From 87ffa60454a4aede0e7123e8fc01e9ba9d64ee37 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 11 Mar 2017 04:59:18 -0500 Subject: [PATCH] Merged upstream, updated tests. --- .gitignore | 3 + CMakeLists.txt | 84 +- LICENSE.TXT | 2 +- include/llvm/ADT/Triple.h | 23 +- include/llvm/CodeGen/ISDOpcodes.h | 37 +- include/llvm/CodeGen/MachineBasicBlock.h | 56 +- include/llvm/CodeGen/MachineValueType.h | 14 +- include/llvm/CodeGen/RuntimeLibcalls.h | 11 + include/llvm/CodeGen/SelectionDAG.h | 84 +- include/llvm/MC/MCAsmInfo.h | 140 +- include/llvm/Support/ELF.h | 41 +- include/llvm/Support/MathExtras.h | 40 +- include/llvm/Target/TargetLowering.h | 193 +- lib/CodeGen/CodeGenPrepare.cpp | 713 +- lib/CodeGen/InlineSpiller.cpp | 10 +- lib/CodeGen/MachineBasicBlock.cpp | 71 +- lib/CodeGen/MachineFunction.cpp | 3 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1236 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 57 +- .../SelectionDAG/FunctionLoweringInfo.cpp | 71 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 32 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 59 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 40 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 31 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 11 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 201 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 247 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 1002 +- .../SelectionDAG/SelectionDAGBuilder.h | 87 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 124 +- lib/CodeGen/TargetLoweringBase.cpp | 38 +- lib/MC/MCAsmInfo.cpp | 60 +- lib/MC/MCAsmStreamer.cpp | 27 +- lib/MC/MCELFStreamer.cpp | 78 +- lib/MC/MCMachOStreamer.cpp | 49 +- lib/MC/MCParser/AsmParser.cpp | 79 +- lib/MC/MCStreamer.cpp | 52 +- lib/Support/APInt.cpp | 157 +- lib/Support/Triple.cpp | 5 + lib/Target/AArch64/AArch64ISelLowering.cpp | 380 +- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 556 +- lib/Target/AMDGPU/R600ISelLowering.cpp | 350 +- .../ARM/MCTargetDesc/ARMELFStreamer.cpp | 99 +- lib/Target/X86/X86FastISel.cpp | 12 +- lib/Target/X86/X86ISelLowering.cpp | 4155 ++++-- lib/Target/Z80/Z80ISelDAGToDAG.cpp | 9 +- lib/Target/Z80/Z80MCInstLower.cpp | 2 + test/CodeGen/Hexagon/adde.ll | 49 +- test/CodeGen/MSP430/Inst8rr.ll | 10 +- .../CodeGen/Mips/dynamic-stack-realignment.ll | 299 + test/CodeGen/NVPTX/add-128bit.ll | 2 +- test/CodeGen/X86/adde-carry.ll | 194 + test/CodeGen/X86/promote-vec3.ll | 139 + test/CodeGen/X86/vselect-pcmp.ll | 323 + test/CodeGen/X86/widen_bitops-0.ll | 307 + tools/clang/include/clang/AST/ASTContext.h | 42 +- .../clang/include/clang/AST/BuiltinTypes.def | 3 - tools/clang/include/clang/AST/TypeLoc.h | 75 +- tools/clang/include/clang/Basic/TargetInfo.h | 29 +- tools/clang/include/clang/Sema/DeclSpec.h | 42 +- .../include/clang/Serialization/ASTBitCodes.h | 54 +- tools/clang/lib/AST/ASTContext.cpp | 170 +- tools/clang/lib/AST/ExprConstant.cpp | 1117 +- tools/clang/lib/AST/ItaniumMangle.cpp | 29 +- tools/clang/lib/AST/MicrosoftMangle.cpp | 84 +- tools/clang/lib/AST/NSAPI.cpp | 1 - tools/clang/lib/AST/Type.cpp | 66 +- tools/clang/lib/AST/TypeLoc.cpp | 1 - tools/clang/lib/Basic/TargetInfo.cpp | 6 +- tools/clang/lib/Basic/Targets.cpp | 829 +- tools/clang/lib/CodeGen/CGDebugInfo.cpp | 344 +- tools/clang/lib/CodeGen/CodeGenTypes.cpp | 6 +- tools/clang/lib/CodeGen/ItaniumCXXABI.cpp | 97 +- tools/clang/lib/CodeGen/TargetInfo.cpp | 296 +- tools/clang/lib/Driver/Tools.cpp | 12162 ---------------- tools/clang/lib/Format/FormatToken.cpp | 12 +- tools/clang/lib/Frontend/InitPreprocessor.cpp | 15 +- tools/clang/lib/Index/USRGeneration.cpp | 20 +- tools/clang/lib/Parse/ParseDecl.cpp | 346 +- tools/clang/lib/Parse/ParseExprCXX.cpp | 71 +- tools/clang/lib/Sema/DeclSpec.cpp | 2 +- tools/clang/lib/Sema/Sema.cpp | 179 +- tools/clang/lib/Sema/SemaDecl.cpp | 628 +- tools/clang/lib/Sema/SemaOverload.cpp | 713 +- tools/clang/lib/Sema/SemaTemplateVariadic.cpp | 27 +- tools/clang/lib/Sema/SemaType.cpp | 406 +- tools/clang/lib/Sema/TreeTransform.h | 709 +- tools/clang/lib/Serialization/ASTCommon.cpp | 5 +- tools/clang/lib/Serialization/ASTReader.cpp | 1637 ++- tools/clang/lib/Serialization/ASTWriter.cpp | 430 +- tools/clang/test/Driver/sanitizer-ld.c | 96 +- 91 files changed, 14695 insertions(+), 18208 deletions(-) create mode 100644 test/CodeGen/Mips/dynamic-stack-realignment.ll create mode 100644 test/CodeGen/X86/adde-carry.ll create mode 100644 test/CodeGen/X86/promote-vec3.ll create mode 100644 test/CodeGen/X86/vselect-pcmp.ll create mode 100644 test/CodeGen/X86/widen_bitops-0.ll delete mode 100644 tools/clang/lib/Driver/Tools.cpp diff --git a/.gitignore b/.gitignore index e8588a6..e114415 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,9 @@ #OS X specific files. .DS_store +# Nested build directory +/build + #==============================================================================# # Explicit files to ignore (only matches one). #==============================================================================# diff --git a/CMakeLists.txt b/CMakeLists.txt index 59dcc2e..a724f9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ if(POLICY CMP0057) endif() if(NOT DEFINED LLVM_VERSION_MAJOR) - set(LLVM_VERSION_MAJOR 4) + set(LLVM_VERSION_MAJOR 5) endif() if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) @@ -56,17 +56,20 @@ endif() # This should only apply if you are both on an Apple host, and targeting Apple. if(CMAKE_HOST_APPLE AND APPLE) - if(NOT CMAKE_XCRUN) - find_program(CMAKE_XCRUN NAMES xcrun) - endif() - if(CMAKE_XCRUN) - execute_process(COMMAND ${CMAKE_XCRUN} -find libtool - OUTPUT_VARIABLE CMAKE_LIBTOOL - OUTPUT_STRIP_TRAILING_WHITESPACE) - endif() + # if CMAKE_LIBTOOL is not set, try and find it with xcrun or find_program + if(NOT CMAKE_LIBTOOL) + if(NOT CMAKE_XCRUN) + find_program(CMAKE_XCRUN NAMES xcrun) + endif() + if(CMAKE_XCRUN) + execute_process(COMMAND ${CMAKE_XCRUN} -find libtool + OUTPUT_VARIABLE CMAKE_LIBTOOL + OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() - if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL) - find_program(CMAKE_LIBTOOL NAMES libtool) + if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL) + find_program(CMAKE_LIBTOOL NAMES libtool) + endif() endif() get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) @@ -132,18 +135,6 @@ foreach(proj ${LLVM_ENABLE_PROJECTS}) endif() endforeach() -# The following only works with the Ninja generator in CMake >= 3.0. -set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING - "Define the maximum number of concurrent compilation jobs.") -if(LLVM_PARALLEL_COMPILE_JOBS) - if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") - message(WARNING "Job pooling is only available with Ninja generators.") - else() - set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS}) - set(CMAKE_JOB_POOL_COMPILE compile_job_pool) - endif() -endif() - # Build llvm with ccache if the package is present set(LLVM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build") if(LLVM_CCACHE_BUILD) @@ -178,21 +169,12 @@ if(LLVM_DEPENDENCY_DEBUGGING) endif() endif() -option(LLVM_BUILD_GLOBAL_ISEL "Experimental: Build GlobalISel" OFF) +option(LLVM_BUILD_GLOBAL_ISEL "Experimental: Build GlobalISel" ON) if(LLVM_BUILD_GLOBAL_ISEL) add_definitions(-DLLVM_BUILD_GLOBAL_ISEL) endif() -set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING - "Define the maximum number of concurrent link jobs.") -if(LLVM_PARALLEL_LINK_JOBS) - if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") - message(WARNING "Job pooling is only available with Ninja generators.") - else() - set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS}) - set(CMAKE_JOB_POOL_LINK link_job_pool) - endif() -endif() +option(LLVM_ENABLE_DAGISEL_COV "Debug: Prints tablegen patterns that were used for selecting" OFF) # Add path for custom modules set(CMAKE_MODULE_PATH @@ -415,9 +397,6 @@ option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF) set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING "Enable abi-breaking checks. Can be WITH_ASSERTS, FORCE_ON or FORCE_OFF.") -option(LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING - "Disable abi-breaking checks mismatch detection at link-tim." OFF) - option(LLVM_FORCE_USE_OLD_HOST_TOOLCHAIN "Set to ON to force using an old, unsupported host toolchain." OFF) @@ -738,6 +717,30 @@ configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake ${LLVM_INCLUDE_DIR}/llvm/Support/DataTypes.h) +# Add target for generating source rpm package. +set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in + CACHE FILEPATH ".spec file to use for srpm generation") +set(LLVM_SRPM_BINARY_SPECFILE ${CMAKE_CURRENT_BINARY_DIR}/llvm.spec) +set(LLVM_SRPM_DIR "${CMAKE_CURRENT_BINARY_DIR}/srpm") + +# SVN_REVISION and GIT_COMMIT get set by the call to add_version_info_from_vcs. +# DUMMY_VAR contains a version string which we don't care about. +add_version_info_from_vcs(DUMMY_VAR) +if ( SVN_REVISION ) + set(LLVM_RPM_SPEC_REVISION "r${SVN_REVISION}") +elseif ( GIT_COMMIT ) + set (LLVM_RPM_SPEC_REVISION "g${GIT_COMMIT}") +endif() + +configure_file( + ${LLVM_SRPM_USER_BINARY_SPECFILE} + ${LLVM_SRPM_BINARY_SPECFILE} @ONLY) + +add_custom_target(srpm + COMMAND cpack -G TGZ --config CPackSourceConfig.cmake -B ${LLVM_SRPM_DIR}/SOURCES + COMMAND rpmbuild -bs --define '_topdir ${LLVM_SRPM_DIR}' ${LLVM_SRPM_BINARY_SPECFILE}) + + # They are not referenced. See set_output_directory(). set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/bin ) set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) @@ -879,7 +882,7 @@ if( LLVM_INCLUDE_TESTS ) endif() add_subdirectory(test) add_subdirectory(unittests) - if (MSVC) + if (WIN32) # This utility is used to prevent crashing tests from calling Dr. Watson on # Windows. add_subdirectory(utils/KillTheDoctor) @@ -979,3 +982,8 @@ if(LLVM_DISTRIBUTION_COMPONENTS) endif() endforeach() endif() + +# This allows us to deploy the Universal CRT DLLs by passing -DCMAKE_INSTALL_UCRT_LIBRARIES=ON to CMake +if (MSVC) + include(InstallRequiredSystemLibraries) +endif() diff --git a/LICENSE.TXT b/LICENSE.TXT index 555c8bb..ff63f2b 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -4,7 +4,7 @@ LLVM Release License University of Illinois/NCSA Open Source License -Copyright (c) 2003-2016 University of Illinois at Urbana-Champaign. +Copyright (c) 2003-2017 University of Illinois at Urbana-Champaign. All rights reserved. Developed by: diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index f1ac5ef..be359da 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -112,6 +112,7 @@ class Triple { ARMSubArch_v7m, ARMSubArch_v7s, ARMSubArch_v7k, + ARMSubArch_v7ve, ARMSubArch_v6, ARMSubArch_v6m, ARMSubArch_v6k, @@ -208,6 +209,7 @@ class Triple { COFF, ELF, MachO, + Wasm, }; private: @@ -560,7 +562,8 @@ class Triple { /// Tests whether the OS uses glibc. bool isOSGlibc() const { - return getOS() == Triple::Linux || getOS() == Triple::KFreeBSD; + return (getOS() == Triple::Linux || getOS() == Triple::KFreeBSD) && + !isAndroid(); } /// Tests whether the OS uses the ELF binary format. @@ -578,6 +581,11 @@ class Triple { return getObjectFormat() == Triple::MachO; } + /// Tests whether the OS uses the Wasm binary format. + bool isOSBinFormatWasm() const { + return getObjectFormat() == Triple::Wasm; + } + /// Tests whether the target is the PS4 CPU bool isPS4CPU() const { return getArch() == Triple::x86_64 && @@ -594,6 +602,19 @@ class Triple { /// Tests whether the target is Android bool isAndroid() const { return getEnvironment() == Triple::Android; } + bool isAndroidVersionLT(unsigned Major) const { + assert(isAndroid() && "Not an Android triple!"); + + unsigned Env[3]; + getEnvironmentVersion(Env[0], Env[1], Env[2]); + + // 64-bit targets did not exist before API level 21 (Lollipop). + if (isArch64Bit() && Env[0] < 21) + Env[0] = 21; + + return Env[0] < Major; + } + /// Tests whether the environment is musl-libc bool isMusl() const { return getEnvironment() == Triple::Musl || diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 420b03e..f943e48 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -245,6 +245,12 @@ namespace ISD { /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, + /// Constrained versions of the binary floating point operators. + /// These will be lowered to the simple operators before final selection. + /// They are used to limit optimizations while the DAG is being + /// optimized. + STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, @@ -281,7 +287,8 @@ namespace ISD { /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR /// identified by the (potentially variable) element number IDX. If the /// return type is an integer type larger than the element type of the - /// vector, the result is extended to the width of the return type. + /// vector, the result is extended to the width of the return type. In + /// that case, the high bits are undefined. EXTRACT_VECTOR_ELT, /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of @@ -503,19 +510,6 @@ namespace ISD { /// address spaces. ADDRSPACECAST, - /// CONVERT_RNDSAT - This operator is used to support various conversions - /// between various types (float, signed, unsigned and vectors of those - /// types) with rounding and saturation. NOTE: Avoid using this operator as - /// most target don't support it and the operator might be removed in the - /// future. It takes the following arguments: - /// 0) value - /// 1) dest type (type to convert to) - /// 2) src type (type to convert from) - /// 3) rounding imm - /// 4) saturation imm - /// 5) ISD::CvtCode indicating the type of conversion to do - CONVERT_RNDSAT, - /// FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions /// and truncation for half-precision (16 bit) floating numbers. These nodes /// form a semi-softened interface for dealing with f16 (as an i16), which @@ -927,21 +921,6 @@ namespace ISD { /// SETCC_INVALID if it is not possible to represent the resultant comparison. CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger); - //===--------------------------------------------------------------------===// - /// This enum defines the various converts CONVERT_RNDSAT supports. - enum CvtCode { - CVT_FF, /// Float from Float - CVT_FS, /// Float from Signed - CVT_FU, /// Float from Unsigned - CVT_SF, /// Signed from Float - CVT_UF, /// Unsigned from Float - CVT_SS, /// Signed from Signed - CVT_SU, /// Signed from Unsigned - CVT_US, /// Unsigned from Signed - CVT_UU, /// Unsigned from Unsigned - CVT_INVALID /// Marker - Invalid opcode - }; - } // end llvm::ISD namespace } // end llvm namespace diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 2a97419..544497a 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/DataTypes.h" #include @@ -35,9 +36,6 @@ class StringRef; class raw_ostream; class MachineBranchProbabilityInfo; -// Forward declaration to avoid circular include problem with TargetRegisterInfo -typedef unsigned LaneBitmask; - template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. @@ -130,7 +128,7 @@ class MachineBasicBlock /// to an LLVM basic block. const BasicBlock *getBasicBlock() const { return BB; } - /// Return the name of the corresponding LLVM basic block, or "(null)". + /// Return the name of the corresponding LLVM basic block, or an empty string. StringRef getName() const; /// Return a formatted string to identify this block and its parent function. @@ -278,7 +276,8 @@ class MachineBasicBlock /// Adds the specified register as a live in. Note that it is an error to add /// the same register to the same set more than once unless the intention is /// to call sortUniqueLiveIns after all registers are added. - void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask = ~0u) { + void addLiveIn(MCPhysReg PhysReg, + LaneBitmask LaneMask = LaneBitmask::getAll()) { LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask)); } void addLiveIn(const RegisterMaskPair &RegMaskPair) { @@ -290,21 +289,36 @@ class MachineBasicBlock /// LiveIn insertion. void sortUniqueLiveIns(); + /// Clear live in list. + void clearLiveIns(); + /// Add PhysReg as live in to this block, and ensure that there is a copy of /// PhysReg to a virtual register of class RC. Return the virtual register /// that is a copy of the live in PhysReg. unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC); /// Remove the specified register from the live in set. - void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u); + void removeLiveIn(MCPhysReg Reg, + LaneBitmask LaneMask = LaneBitmask::getAll()); /// Return true if the specified register is in the live in set. - bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u) const; + bool isLiveIn(MCPhysReg Reg, + LaneBitmask LaneMask = LaneBitmask::getAll()) const; // Iteration support for live in sets. These sets are kept in sorted // order by their register number. typedef LiveInVector::const_iterator livein_iterator; - livein_iterator livein_begin() const { return LiveIns.begin(); } +#ifndef NDEBUG + /// Unlike livein_begin, this method does not check that the liveness + /// information is accurate. Still for debug purposes it may be useful + /// to have iterators that won't assert if the liveness information + /// is not current. + livein_iterator livein_begin_dbg() const { return LiveIns.begin(); } + iterator_range liveins_dbg() const { + return make_range(livein_begin_dbg(), livein_end()); + } +#endif + livein_iterator livein_begin() const; livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } iterator_range liveins() const { @@ -650,6 +664,10 @@ class MachineBasicBlock return findDebugLoc(MBBI.getInstrIterator()); } + /// Find and return the merged DebugLoc of the branch instructions of the + /// block. Return UnknownLoc if there is none. + DebugLoc findBranchDebugLoc(); + /// Possible outcome of a register liveness query to computeRegisterLiveness() enum LivenessQueryResult { LQR_Live, ///< Register is known to be (at least partially) live. @@ -805,6 +823,28 @@ class MachineInstrSpan { MachineBasicBlock::iterator getInitial() { return I; } }; +/// Increment \p It until it points to a non-debug instruction or to \p End +/// and return the resulting iterator. This function should only be used +/// MachineBasicBlock::{iterator, const_iterator, instr_iterator, +/// const_instr_iterator} and the respective reverse iterators. +template +inline IterT skipDebugInstructionsForward(IterT It, IterT End) { + while (It != End && It->isDebugValue()) + It++; + return It; +} + +/// Decrement \p It until it points to a non-debug instruction or to \p Begin +/// and return the resulting iterator. This function should only be used +/// MachineBasicBlock::{iterator, const_iterator, instr_iterator, +/// const_instr_iterator} and the respective reverse iterators. +template +inline IterT skipDebugInstructionsBackward(IterT It, IterT Begin) { + while (It != Begin && It->isDebugValue()) + It--; + return It; +} + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h index d6adf40..8226384 100644 --- a/include/llvm/CodeGen/MachineValueType.h +++ b/include/llvm/CodeGen/MachineValueType.h @@ -234,17 +234,17 @@ namespace llvm { /// is32BitVector - Return true if this is a 32-bit vector type. bool is32BitVector() const { - return (SimpleTy == MVT::v4i8 || SimpleTy == MVT::v2i16 || - SimpleTy == MVT::v1i32 || SimpleTy == MVT::v2f16 || - SimpleTy == MVT::v1f32); + return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8 || + SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32 || + SimpleTy == MVT::v2f16 || SimpleTy == MVT::v1f32); } /// is64BitVector - Return true if this is a 64-bit vector type. bool is64BitVector() const { - return (SimpleTy == MVT::v8i8 || SimpleTy == MVT::v4i16 || - SimpleTy == MVT::v2i32 || SimpleTy == MVT::v1i64 || - SimpleTy == MVT::v4f16 || SimpleTy == MVT::v2f32 || - SimpleTy == MVT::v1f64); + return (SimpleTy == MVT::v64i1 || SimpleTy == MVT::v8i8 || + SimpleTy == MVT::v4i16 || SimpleTy == MVT::v2i32 || + SimpleTy == MVT::v1i64 || SimpleTy == MVT::v4f16 || + SimpleTy == MVT::v2f32 || SimpleTy == MVT::v1f64); } /// is128BitVector - Return true if this is a 128-bit vector type. diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 693a707..357c5cc 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -387,6 +387,13 @@ namespace RTLIB { MEMSET, MEMMOVE, + // ELEMENT-WISE ATOMIC MEMORY + MEMCPY_ELEMENT_ATOMIC_1, + MEMCPY_ELEMENT_ATOMIC_2, + MEMCPY_ELEMENT_ATOMIC_4, + MEMCPY_ELEMENT_ATOMIC_8, + MEMCPY_ELEMENT_ATOMIC_16, + // EXCEPTION HANDLING UNWIND_RESUME, @@ -557,6 +564,10 @@ namespace RTLIB { /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or /// UNKNOWN_LIBCALL if there is none. Libcall getSYNC(unsigned Opc, MVT VT); + + /// getMEMCPY_ELEMENT_ATOMIC - Return MEMCPY_ELEMENT_ATOMIC_* value for the + /// given element size or UNKNOW_LIBCALL if there is none. + Libcall getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize); } } diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 3b6d518..cd4567c 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -239,7 +239,7 @@ class SelectionDAG { std::function Callback; DAGNodeDeletedListener(SelectionDAG &DAG, std::function Callback) - : DAGUpdateListener(DAG), Callback(Callback) {} + : DAGUpdateListener(DAG), Callback(std::move(Callback)) {} void NodeDeleted(SDNode *N, SDNode *E) override { Callback(N, E); } }; @@ -480,6 +480,13 @@ class SelectionDAG { bool isTarget = false, bool isOpaque = false); SDValue getConstant(const APInt &Val, const SDLoc &DL, EVT VT, bool isTarget = false, bool isOpaque = false); + + SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget = false, + bool IsOpaque = false) { + return getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, + VT, IsTarget, IsOpaque); + } + SDValue getConstant(const ConstantInt &Val, const SDLoc &DL, EVT VT, bool isTarget = false, bool isOpaque = false); SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, @@ -626,12 +633,6 @@ class SelectionDAG { SDValue getCondCode(ISD::CondCode Cond); - /// Returns the ConvertRndSat Note: Avoid using this node because it may - /// disappear in the future and most targets don't support it. - SDValue getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val, SDValue DTy, - SDValue STy, SDValue Rnd, SDValue Sat, - ISD::CvtCode Code); - /// Return an ISD::VECTOR_SHUFFLE node. The number of elements in VT, /// which must be a vector type, must match the number of mask elements /// NumElts. An integer mask element equal to -1 is treated as undefined. @@ -964,6 +965,14 @@ class SelectionDAG { ArrayRef Ops, MachineMemOperand *MMO); SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO); + + /// Return (create a new or find existing) a target-specific node. + /// TargetMemSDNode should be derived class from MemSDNode. + template + SDValue getTargetMemSDNode(SDVTList VTs, ArrayRef Ops, + const SDLoc &dl, EVT MemVT, + MachineMemOperand *MMO); + /// Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); @@ -1272,6 +1281,19 @@ class SelectionDAG { void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, const APInt &DemandedElts, unsigned Depth = 0) const; + /// Used to represent the possible overflow behavior of an operation. + /// Never: the operation cannot overflow. + /// Always: the operation will always overflow. + /// Sometime: the operation may or may not overflow. + enum OverflowKind { + OFK_Never, + OFK_Sometime, + OFK_Always, + }; + + /// Determine if the result of the addition of 2 node can overflow. + OverflowKind computeOverflowKind(SDValue N0, SDValue N1) const; + /// Test if the given value is known to have exactly one bit set. This differs /// from computeKnownBits in that it doesn't necessarily determine which bit /// is set. @@ -1362,6 +1384,16 @@ class SelectionDAG { /// Test whether the given value is a constant int or similar node. SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N); + /// Test whether the given value is a constant FP or similar node. + SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N); + + /// \returns true if \p N is any kind of constant or build_vector of + /// constants, int or float. If a vector, it may not necessarily be a splat. + inline bool isConstantValueOfAnyType(SDValue N) { + return isConstantIntBuildVectorOrConstantInt(N) || + isConstantFPBuildVectorOrConstantFP(N); + } + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); @@ -1371,7 +1403,7 @@ class SelectionDAG { void *&InsertPos); SDNode *FindModifiedNodeSlot(SDNode *N, ArrayRef Ops, void *&InsertPos); - SDNode *UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &loc); + SDNode *UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &loc); void DeleteNodeNotInCSEMaps(SDNode *N); void DeallocateNode(SDNode *N); @@ -1418,6 +1450,42 @@ template <> struct GraphTraits : public GraphTraits { } }; +template +SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs, + ArrayRef Ops, + const SDLoc &dl, EVT MemVT, + MachineMemOperand *MMO) { + + /// Compose node ID and try to find an existing node. + FoldingSetNodeID ID; + unsigned Opcode = + TargetMemSDNode(dl.getIROrder(), DebugLoc(), VTs, MemVT, MMO).getOpcode(); + ID.AddInteger(Opcode); + ID.AddPointer(VTs.VTs); + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); + } + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(getSyntheticNodeSubclassData( + dl.getIROrder(), VTs, MemVT, MMO)); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + /// Existing node was not found. Create a new one. + auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, + MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + } // end namespace llvm #endif diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 8a68257..756441d 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -16,20 +16,22 @@ #ifndef LLVM_MC_MCASMINFO_H #define LLVM_MC_MCASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCTargetOptions.h" -#include #include namespace llvm { + +class MCContext; class MCExpr; class MCSection; class MCStreamer; class MCSymbol; -class MCContext; namespace WinEH { + enum class EncodingType { Invalid, /// Invalid Alpha, /// Windows Alpha @@ -40,11 +42,14 @@ enum class EncodingType { X86, /// Windows x86, uses no CFI, just EH tables MIPS = Alpha, }; -} + +} // end namespace WinEH namespace LCOMM { + enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment }; -} + +} // end namespace LCOMM enum class DebugCompressionType { DCT_None, // no compression @@ -61,41 +66,41 @@ class MCAsmInfo { // /// Pointer size in bytes. Default is 4. - unsigned PointerSize; + unsigned PointerSize = 4; /// Size of the stack slot reserved for callee-saved registers, in bytes. /// Default is same as pointer size. - unsigned CalleeSaveStackSlotSize; + unsigned CalleeSaveStackSlotSize = 4; /// True if target is little endian. Default is true. - bool IsLittleEndian; + bool IsLittleEndian = true; /// True if target stack grow up. Default is false. - bool StackGrowsUp; + bool StackGrowsUp = false; /// True if this target has the MachO .subsections_via_symbols directive. /// Default is false. - bool HasSubsectionsViaSymbols; + bool HasSubsectionsViaSymbols = false; /// True if this is a MachO target that supports the macho-specific .zerofill /// directive for emitting BSS Symbols. Default is false. - bool HasMachoZeroFillDirective; + bool HasMachoZeroFillDirective = false; /// True if this is a MachO target that supports the macho-specific .tbss /// directive for emitting thread local BSS Symbols. Default is false. - bool HasMachoTBSSDirective; + bool HasMachoTBSSDirective = false; /// This is the maximum possible length of an instruction, which is needed to /// compute the size of an inline asm. Defaults to 4. - unsigned MaxInstLength; + unsigned MaxInstLength = 4; /// Every possible instruction length is a multiple of this value. Factored /// out in .debug_frame and .debug_line. Defaults to 1. - unsigned MinInstAlignment; + unsigned MinInstAlignment = 1; /// The '$' token, when not referencing an identifier or constant, refers to /// the current PC. Defaults to false. - bool DollarIsPC; + bool DollarIsPC = false; /// This string, if specified, is used to separate instructions from each /// other when on the same line. Defaults to ';' @@ -109,10 +114,10 @@ class MCAsmInfo { const char *LabelSuffix; // Print the EH begin symbol with an assignment. Defaults to false. - bool UseAssignmentForEHBegin; + bool UseAssignmentForEHBegin = false; // Do we need to create a local symbol for .size? - bool NeedsLocalForSize; + bool NeedsLocalForSize = false; /// This prefix is used for globals like constant pool entries that are /// completely private to the .s file and should not have names in the .o @@ -143,20 +148,20 @@ class MCAsmInfo { const char *Code64Directive; /// Which dialect of an assembler variant to use. Defaults to 0 - unsigned AssemblerDialect; + unsigned AssemblerDialect = 0; /// This is true if the assembler allows @ characters in symbol names. /// Defaults to false. - bool AllowAtInName; + bool AllowAtInName = false; /// If this is true, symbol names with invalid characters will be printed in /// quotes. - bool SupportsQuotedNames; + bool SupportsQuotedNames = true; /// This is true if data region markers should be printed as /// ".data_region/.end_data_region" directives. If false, use "$d/$a" labels /// instead. - bool UseDataRegionDirectives; + bool UseDataRegionDirectives = false; //===--- Data Emission Directives -------------------------------------===// @@ -187,13 +192,13 @@ class MCAsmInfo { /// If non-null, a directive that is used to emit a word which should be /// relocated as a 64-bit GP-relative offset, e.g. .gpdword on Mips. Defaults - /// to NULL. - const char *GPRel64Directive; + /// to nullptr. + const char *GPRel64Directive = nullptr; /// If non-null, a directive that is used to emit a word which should be /// relocated as a 32-bit GP-relative offset, e.g. .gpword on Mips or .gprel32 - /// on Alpha. Defaults to NULL. - const char *GPRel32Directive; + /// on Alpha. Defaults to nullptr. + const char *GPRel32Directive = nullptr; /// If non-null, directives that are used to emit a word/dword which should /// be relocated as a 32/64-bit DTP/TP-relative offset, e.g. .dtprelword/ @@ -206,14 +211,14 @@ class MCAsmInfo { /// This is true if this target uses "Sun Style" syntax for section switching /// ("#alloc,#write" etc) instead of the normal ELF syntax (,"a,w") in /// .section directives. Defaults to false. - bool SunStyleELFSectionSwitchSyntax; + bool SunStyleELFSectionSwitchSyntax = false; /// This is true if this target uses ELF '.section' directive before the /// '.bss' one. It's used for PPC/Linux which doesn't support the '.bss' /// directive only. Defaults to false. - bool UsesELFSectionDirectiveForBSS; + bool UsesELFSectionDirectiveForBSS = false; - bool NeedsDwarfSectionOffsetDirective; + bool NeedsDwarfSectionOffsetDirective = false; //===--- Alignment Information ----------------------------------------===// @@ -221,11 +226,11 @@ class MCAsmInfo { /// directives, where N is the number of bytes to align to. Otherwise, it /// emits ".align log2(N)", e.g. 3 to align to an 8 byte boundary. Defaults /// to true. - bool AlignmentIsInBytes; + bool AlignmentIsInBytes = true; /// If non-zero, this is used to fill the executable space created as the /// result of a alignment directive. Defaults to 0 - unsigned TextAlignFillValue; + unsigned TextAlignFillValue = 0; //===--- Global Variable Emission Directives --------------------------===// @@ -238,7 +243,7 @@ class MCAsmInfo { /// uses a relocation but it can be suppressed by writing /// a = f - g /// .long a - bool SetDirectiveSuppressesReloc; + bool SetDirectiveSuppressesReloc = false; /// False if the assembler requires that we use /// \code @@ -253,98 +258,98 @@ class MCAsmInfo { /// \endcode /// /// Defaults to true. - bool HasAggressiveSymbolFolding; + bool HasAggressiveSymbolFolding = true; /// True is .comm's and .lcomms optional alignment is to be specified in bytes /// instead of log2(n). Defaults to true. - bool COMMDirectiveAlignmentIsInBytes; + bool COMMDirectiveAlignmentIsInBytes = true; /// Describes if the .lcomm directive for the target supports an alignment /// argument and how it is interpreted. Defaults to NoAlignment. - LCOMM::LCOMMType LCOMMDirectiveAlignmentType; + LCOMM::LCOMMType LCOMMDirectiveAlignmentType = LCOMM::NoAlignment; // True if the target allows .align directives on functions. This is true for // most targets, so defaults to true. - bool HasFunctionAlignment; + bool HasFunctionAlignment = true; /// True if the target has .type and .size directives, this is true for most /// ELF targets. Defaults to true. - bool HasDotTypeDotSizeDirective; + bool HasDotTypeDotSizeDirective = true; /// True if the target has a single parameter .file directive, this is true /// for ELF targets. Defaults to true. - bool HasSingleParameterDotFile; + bool HasSingleParameterDotFile = true; /// True if the target has a .ident directive, this is true for ELF targets. /// Defaults to false. - bool HasIdentDirective; + bool HasIdentDirective = false; /// True if this target supports the MachO .no_dead_strip directive. Defaults /// to false. - bool HasNoDeadStrip; + bool HasNoDeadStrip = false; /// True if this target supports the MachO .alt_entry directive. Defaults to /// false. - bool HasAltEntry; + bool HasAltEntry = false; /// Used to declare a global as being a weak symbol. Defaults to ".weak". const char *WeakDirective; /// This directive, if non-null, is used to declare a global as being a weak - /// undefined symbol. Defaults to NULL. - const char *WeakRefDirective; + /// undefined symbol. Defaults to nullptr. + const char *WeakRefDirective = nullptr; /// True if we have a directive to declare a global as being a weak defined /// symbol. Defaults to false. - bool HasWeakDefDirective; + bool HasWeakDefDirective = false; /// True if we have a directive to declare a global as being a weak defined /// symbol that can be hidden (unexported). Defaults to false. - bool HasWeakDefCanBeHiddenDirective; + bool HasWeakDefCanBeHiddenDirective = false; /// True if we have a .linkonce directive. This is used on cygwin/mingw. /// Defaults to false. - bool HasLinkOnceDirective; + bool HasLinkOnceDirective = false; /// This attribute, if not MCSA_Invalid, is used to declare a symbol as having /// hidden visibility. Defaults to MCSA_Hidden. - MCSymbolAttr HiddenVisibilityAttr; + MCSymbolAttr HiddenVisibilityAttr = MCSA_Hidden; /// This attribute, if not MCSA_Invalid, is used to declare an undefined /// symbol as having hidden visibility. Defaults to MCSA_Hidden. - MCSymbolAttr HiddenDeclarationVisibilityAttr; + MCSymbolAttr HiddenDeclarationVisibilityAttr = MCSA_Hidden; /// This attribute, if not MCSA_Invalid, is used to declare a symbol as having /// protected visibility. Defaults to MCSA_Protected - MCSymbolAttr ProtectedVisibilityAttr; + MCSymbolAttr ProtectedVisibilityAttr = MCSA_Protected; //===--- Dwarf Emission Directives -----------------------------------===// /// True if target supports emission of debugging information. Defaults to /// false. - bool SupportsDebugInformation; + bool SupportsDebugInformation = false; /// Exception handling format for the target. Defaults to None. - ExceptionHandling ExceptionsType; + ExceptionHandling ExceptionsType = ExceptionHandling::None; /// Windows exception handling data (.pdata) encoding. Defaults to Invalid. - WinEH::EncodingType WinEHEncodingType; + WinEH::EncodingType WinEHEncodingType = WinEH::EncodingType::Invalid; /// True if Dwarf2 output generally uses relocations for references to other /// .debug_* sections. - bool DwarfUsesRelocationsAcrossSections; + bool DwarfUsesRelocationsAcrossSections = true; /// True if DWARF FDE symbol reference relocations should be replaced by an /// absolute difference. - bool DwarfFDESymbolsUseAbsDiff; + bool DwarfFDESymbolsUseAbsDiff = false; /// True if dwarf register numbers are printed instead of symbolic register /// names in .cfi_* directives. Defaults to false. - bool DwarfRegNumForCFI; + bool DwarfRegNumForCFI = false; /// True if target uses parens to indicate the symbol variant instead of @. /// For example, foo(plt) instead of foo@plt. Defaults to false. - bool UseParensForSymbolVariant; + bool UseParensForSymbolVariant = false; //===--- Prologue State ----------------------------------------------===// @@ -363,11 +368,11 @@ class MCAsmInfo { bool PreserveAsmComments; /// Compress DWARF debug sections. Defaults to no compression. - DebugCompressionType CompressDebugSections; + DebugCompressionType CompressDebugSections = DebugCompressionType::DCT_None; /// True if the integrated assembler should interpret 'a >> b' constant /// expressions as logical rather than arithmetic. - bool UseLogicalShr; + bool UseLogicalShr = true; // If true, emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL, on // X86_64 ELF. @@ -478,14 +483,17 @@ class MCAsmInfo { bool needsLocalForSize() const { return NeedsLocalForSize; } StringRef getPrivateGlobalPrefix() const { return PrivateGlobalPrefix; } StringRef getPrivateLabelPrefix() const { return PrivateLabelPrefix; } + bool hasLinkerPrivateGlobalPrefix() const { return LinkerPrivateGlobalPrefix[0] != '\0'; } + StringRef getLinkerPrivateGlobalPrefix() const { if (hasLinkerPrivateGlobalPrefix()) return LinkerPrivateGlobalPrefix; return getPrivateGlobalPrefix(); } + const char *getInlineAsmStart() const { return InlineAsmStart; } const char *getInlineAsmEnd() const { return InlineAsmEnd; } const char *getCode16Directive() const { return Code16Directive; } @@ -495,25 +503,32 @@ class MCAsmInfo { unsigned getAssemblerDialect() const { return AssemblerDialect; } bool doesAllowAtInName() const { return AllowAtInName; } bool supportsNameQuoting() const { return SupportsQuotedNames; } + bool doesSupportDataRegionDirectives() const { return UseDataRegionDirectives; } + const char *getZeroDirective() const { return ZeroDirective; } const char *getAsciiDirective() const { return AsciiDirective; } const char *getAscizDirective() const { return AscizDirective; } bool getAlignmentIsInBytes() const { return AlignmentIsInBytes; } unsigned getTextAlignFillValue() const { return TextAlignFillValue; } const char *getGlobalDirective() const { return GlobalDirective; } + bool doesSetDirectiveSuppressReloc() const { return SetDirectiveSuppressesReloc; } + bool hasAggressiveSymbolFolding() const { return HasAggressiveSymbolFolding; } + bool getCOMMDirectiveAlignmentIsInBytes() const { return COMMDirectiveAlignmentIsInBytes; } + LCOMM::LCOMMType getLCOMMDirectiveAlignmentType() const { return LCOMMDirectiveAlignmentType; } + bool hasFunctionAlignment() const { return HasFunctionAlignment; } bool hasDotTypeDotSizeDirective() const { return HasDotTypeDotSizeDirective; } bool hasSingleParameterDotFile() const { return HasSingleParameterDotFile; } @@ -523,22 +538,29 @@ class MCAsmInfo { const char *getWeakDirective() const { return WeakDirective; } const char *getWeakRefDirective() const { return WeakRefDirective; } bool hasWeakDefDirective() const { return HasWeakDefDirective; } + bool hasWeakDefCanBeHiddenDirective() const { return HasWeakDefCanBeHiddenDirective; } + bool hasLinkOnceDirective() const { return HasLinkOnceDirective; } MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr; } + MCSymbolAttr getHiddenDeclarationVisibilityAttr() const { return HiddenDeclarationVisibilityAttr; } + MCSymbolAttr getProtectedVisibilityAttr() const { return ProtectedVisibilityAttr; } + bool doesSupportDebugInformation() const { return SupportsDebugInformation; } + bool doesSupportExceptionHandling() const { return ExceptionsType != ExceptionHandling::None; } + ExceptionHandling getExceptionHandlingType() const { return ExceptionsType; } WinEH::EncodingType getWinEHEncodingType() const { return WinEHEncodingType; } @@ -562,6 +584,7 @@ class MCAsmInfo { bool doesDwarfUseRelocationsAcrossSections() const { return DwarfUsesRelocationsAcrossSections; } + bool doDwarfFDESymbolsUseAbsDiff() const { return DwarfFDESymbolsUseAbsDiff; } bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; } bool useParensForSymbolVariant() const { return UseParensForSymbolVariant; } @@ -604,6 +627,7 @@ class MCAsmInfo { void setRelaxELFRelocations(bool V) { RelaxELFRelocations = V; } bool hasMipsExpressions() const { return HasMipsExpressions; } }; -} -#endif +} // end namespace llvm + +#endif // LLVM_MC_MCASMINFO_H diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h index 9ca1dbf..c344c0e 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/Support/ELF.h @@ -560,6 +560,7 @@ enum { EF_HEXAGON_MACH_V5 = 0x00000004, // Hexagon V5 EF_HEXAGON_MACH_V55 = 0x00000005, // Hexagon V55 EF_HEXAGON_MACH_V60 = 0x00000060, // Hexagon V60 + EF_HEXAGON_MACH_V62 = 0x00000062, // Hexagon V62 // Highest ISA version flags EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0] @@ -570,6 +571,7 @@ enum { EF_HEXAGON_ISA_V5 = 0x00000040, // Hexagon V5 ISA EF_HEXAGON_ISA_V55 = 0x00000050, // Hexagon V55 ISA EF_HEXAGON_ISA_V60 = 0x00000060, // Hexagon V60 ISA + EF_HEXAGON_ISA_V62 = 0x00000062, // Hexagon V62 ISA }; // Hexagon-specific section indexes for common small data @@ -707,6 +709,7 @@ enum : unsigned { SHT_MIPS_REGINFO = 0x70000006, // Register usage information SHT_MIPS_OPTIONS = 0x7000000d, // General options + SHT_MIPS_DWARF = 0x7000001e, // DWARF debugging section. SHT_MIPS_ABIFLAGS = 0x7000002a, // ABI information. SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type. @@ -755,21 +758,21 @@ enum : unsigned { // Start of target-specific flags. - /// XCORE_SHF_CP_SECTION - All sections with the "c" flag are grouped - /// together by the linker to form the constant pool and the cp register is - /// set to the start of the constant pool by the boot code. - XCORE_SHF_CP_SECTION = 0x800U, - - /// XCORE_SHF_DP_SECTION - All sections with the "d" flag are grouped - /// together by the linker to form the data section and the dp register is - /// set to the start of the section by the boot code. - XCORE_SHF_DP_SECTION = 0x1000U, - SHF_MASKOS = 0x0ff00000, // Bits indicating processor-specific flags. SHF_MASKPROC = 0xf0000000, + /// All sections with the "d" flag are grouped together by the linker to form + /// the data section and the dp register is set to the start of the section by + /// the boot code. + XCORE_SHF_DP_SECTION = 0x10000000, + + /// All sections with the "c" flag are grouped together by the linker to form + /// the constant pool and the cp register is set to the start of the constant + /// pool by the boot code. + XCORE_SHF_CP_SECTION = 0x20000000, + // If an object file section does not have this flag set, then it may not hold // more than 2GB and can be freely referred to in objects using smaller code // models. Otherwise, only objects using larger code models can refer to them. @@ -1147,6 +1150,11 @@ enum { DT_VERNEED = 0X6FFFFFFE, // The address of the version Dependency table. DT_VERNEEDNUM = 0X6FFFFFFF, // The number of entries in DT_VERNEED. + // Hexagon specific dynamic table entries + DT_HEXAGON_SYMSZ = 0x70000000, + DT_HEXAGON_VER = 0x70000001, + DT_HEXAGON_PLT = 0x70000002, + // Mips specific dynamic table entry tags. DT_MIPS_RLD_VERSION = 0x70000001, // 32 bit version number for runtime // linker interface. @@ -1310,6 +1318,19 @@ enum { enum { VER_NEED_NONE = 0, VER_NEED_CURRENT = 1 }; // SHT_NOTE section types +enum { + NT_FREEBSD_THRMISC = 7, + NT_FREEBSD_PROCSTAT_PROC = 8, + NT_FREEBSD_PROCSTAT_FILES = 9, + NT_FREEBSD_PROCSTAT_VMMAP = 10, + NT_FREEBSD_PROCSTAT_GROUPS = 11, + NT_FREEBSD_PROCSTAT_UMASK = 12, + NT_FREEBSD_PROCSTAT_RLIMIT = 13, + NT_FREEBSD_PROCSTAT_OSREL = 14, + NT_FREEBSD_PROCSTAT_PSSTRINGS = 15, + NT_FREEBSD_PROCSTAT_AUXV = 16, +}; + enum { NT_GNU_ABI_TAG = 1, NT_GNU_HWCAP = 2, diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index c38d037..31b6154 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -547,23 +547,19 @@ inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { /// BitsToDouble - This function takes a 64-bit integer and returns the bit /// equivalent double. inline double BitsToDouble(uint64_t Bits) { - union { - uint64_t L; - double D; - } T; - T.L = Bits; - return T.D; + double D; + static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); + memcpy(&D, &Bits, sizeof(Bits)); + return D; } /// BitsToFloat - This function takes a 32-bit integer and returns the bit /// equivalent float. inline float BitsToFloat(uint32_t Bits) { - union { - uint32_t I; - float F; - } T; - T.I = Bits; - return T.F; + float F; + static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); + memcpy(&F, &Bits, sizeof(Bits)); + return F; } /// DoubleToBits - This function takes a double and returns the bit @@ -571,12 +567,10 @@ inline float BitsToFloat(uint32_t Bits) { /// changes the bits of NaNs on some hosts, notably x86, so this /// routine cannot be used if these bits are needed. inline uint64_t DoubleToBits(double Double) { - union { - uint64_t L; - double D; - } T; - T.D = Double; - return T.L; + uint64_t Bits; + static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); + memcpy(&Bits, &Double, sizeof(Double)); + return Bits; } /// FloatToBits - This function takes a float and returns the bit @@ -584,12 +578,10 @@ inline uint64_t DoubleToBits(double Double) { /// changes the bits of NaNs on some hosts, notably x86, so this /// routine cannot be used if these bits are needed. inline uint32_t FloatToBits(float Float) { - union { - uint32_t I; - float F; - } T; - T.F = Float; - return T.I; + uint32_t Bits; + static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); + memcpy(&Bits, &Float, sizeof(Float)); + return Bits; } /// MinAlign - A and B are either alignments or offsets. Return the minimum diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 317582c..25d3664 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -23,66 +23,80 @@ #ifndef LLVM_TARGET_TARGETLOWERING_H #define LLVM_TARGET_TARGETLOWERING_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetCallingConv.h" #include "llvm/Target/TargetMachine.h" +#include +#include #include +#include +#include #include +#include +#include #include namespace llvm { - class BranchProbability; - class CallInst; - class CCState; - class CCValAssign; - class FastISel; - class FunctionLoweringInfo; - class ImmutableCallSite; - class IntrinsicInst; - class MachineBasicBlock; - class MachineFunction; - class MachineInstr; - class MachineJumpTableInfo; - class MachineLoop; - class MachineRegisterInfo; - class Mangler; - class MCContext; - class MCExpr; - class MCSymbol; - template class SmallVectorImpl; - class DataLayout; - class TargetRegisterClass; - class TargetLibraryInfo; - class TargetLoweringObjectFile; - class Value; - - namespace Sched { - enum Preference { - None, // No preference - Source, // Follow source order. - RegPressure, // Scheduling for lowest register pressure. - Hybrid, // Scheduling for both latency and register pressure. - ILP, // Scheduling for ILP in low register pressure mode. - VLIW // Scheduling for VLIW targets. - }; - } + +class BranchProbability; +class CCState; +class CCValAssign; +class FastISel; +class FunctionLoweringInfo; +class IntrinsicInst; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineJumpTableInfo; +class MachineLoop; +class MachineRegisterInfo; +class MCContext; +class MCExpr; +class TargetRegisterClass; +class TargetLibraryInfo; +class TargetRegisterInfo; +class Value; + +namespace Sched { + + enum Preference { + None, // No preference + Source, // Follow source order. + RegPressure, // Scheduling for lowest register pressure. + Hybrid, // Scheduling for both latency and register pressure. + ILP, // Scheduling for ILP in low register pressure mode. + VLIW // Scheduling for VLIW targets. + }; + +} // end namespace Sched /// This base class for TargetLowering contains the SelectionDAG-independent /// parts that can be used from the rest of CodeGen. class TargetLoweringBase { - TargetLoweringBase(const TargetLoweringBase&) = delete; - void operator=(const TargetLoweringBase&) = delete; - public: /// This enum indicates whether operations are valid for a target, and if not, /// what action should be used to make them valid. @@ -166,7 +180,9 @@ class TargetLoweringBase { /// NOTE: The TargetMachine owns TLOF. explicit TargetLoweringBase(const TargetMachine &TM); - virtual ~TargetLoweringBase() {} + TargetLoweringBase(const TargetLoweringBase&) = delete; + void operator=(const TargetLoweringBase&) = delete; + virtual ~TargetLoweringBase() = default; protected: /// \brief Initialize all of the actions to default values. @@ -372,23 +388,22 @@ class TargetLoweringBase { /// \brief Return true if it is cheaper to split the store of a merged int val /// from a pair of smaller values into multiple stores. - virtual bool isMultiStoresCheaperThanBitsMerge(SDValue Lo, SDValue Hi) const { + virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { return false; } /// \brief Return if the target supports combining a /// chain like: /// \code - /// %andResult = and %val1, #imm-with-one-bit-set; + /// %andResult = and %val1, #mask /// %icmpResult = icmp %andResult, 0 - /// br i1 %icmpResult, label %dest1, label %dest2 /// \endcode /// into a single machine instruction of a form like: /// \code - /// brOnBitSet %register, #bitNumber, dest + /// cc = test %register, #mask /// \endcode - bool isMaskAndBranchFoldingLegal() const { - return MaskAndBranchFoldingIsLegal; + virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { + return false; } /// Return true if the target should transform: @@ -599,19 +614,18 @@ class TargetLoweringBase { MVT &RegisterVT) const; struct IntrinsicInfo { - unsigned opc; // target opcode - EVT memVT; // memory VT - const Value* ptrVal; // value representing memory location - int offset; // offset off of ptrVal - unsigned size; // the size of the memory location - // (taken from memVT if zero) - unsigned align; // alignment - bool vol; // is volatile? - bool readMem; // reads memory? - bool writeMem; // writes memory? - - IntrinsicInfo() : opc(0), ptrVal(nullptr), offset(0), size(0), align(1), - vol(false), readMem(false), writeMem(false) {} + unsigned opc = 0; // target opcode + EVT memVT; // memory VT + const Value* ptrVal = nullptr; // value representing memory location + int offset = 0; // offset off of ptrVal + unsigned size = 0; // the size of the memory location + // (taken from memVT if zero) + unsigned align = 1; // alignment + bool vol = false; // is volatile? + bool readMem = false; // reads memory? + bool writeMem = false; // writes memory? + + IntrinsicInfo() = default; }; /// Given an intrinsic, checks if on the target the intrinsic will need to map @@ -823,7 +837,6 @@ class TargetLoweringBase { getCondCodeAction(CC, VT) == Custom; } - /// If the action for this operation is to promote, this method returns the /// ValueType to promote to. MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { @@ -975,6 +988,11 @@ class TargetLoweringBase { return GatherAllAliasesMaxDepth; } + /// Returns the size of the platform's va_list object. + virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { + return getPointerTy(DL).getSizeInBits(); + } + /// \brief Get maximum # of store operations permitted for llvm.memset /// /// This function returns the maximum number of store operations permitted @@ -1372,6 +1390,13 @@ class TargetLoweringBase { Action != TypeSplitVector; } + /// Return true if a select of constants (select Cond, C1, C2) should be + /// transformed into simple math ops with the condition value. For example: + /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 + virtual bool convertSelectOfConstantsToMath() const { + return false; + } + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. @@ -1478,7 +1503,8 @@ class TargetLoweringBase { void computeRegisterProperties(const TargetRegisterInfo *TRI); /// Indicate that the specified operation does not work with the specified - /// type and indicate what to do about it. + /// type and indicate what to do about it. Note that VT may refer to either + /// the type of a result or that of an operand of Op. void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); @@ -1630,10 +1656,9 @@ class TargetLoweringBase { /// possible to be done in the address mode for that operand. This hook lets /// targets also pass back when this should be done on intrinsics which /// load/store. - virtual bool GetAddrModeArguments(IntrinsicInst * /*I*/, + virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, SmallVectorImpl &/*Ops*/, - Type *&/*AccessTy*/, - unsigned AddrSpace = 0) const { + Type *&/*AccessTy*/) const { return false; } @@ -1645,11 +1670,11 @@ class TargetLoweringBase { /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with /// no scale. struct AddrMode { - GlobalValue *BaseGV; - int64_t BaseOffs; - bool HasBaseReg; - int64_t Scale; - AddrMode() : BaseGV(nullptr), BaseOffs(0), HasBaseReg(false), Scale(0) {} + GlobalValue *BaseGV = nullptr; + int64_t BaseOffs = 0; + bool HasBaseReg = false; + int64_t Scale = 0; + AddrMode() = default; }; /// Return true if the addressing mode represented by AM is legal for this @@ -2096,8 +2121,6 @@ class TargetLoweringBase { private: LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; -private: - /// Targets can specify ISD nodes that they would like PerformDAGCombine /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this /// array. @@ -2188,14 +2211,9 @@ class TargetLoweringBase { /// the branch is usually predicted right. bool PredictableSelectIsExpensive; - /// MaskAndBranchFoldingIsLegal - Indicates if the target supports folding - /// a mask of a single bit, a compare, and a branch into a single instruction. - bool MaskAndBranchFoldingIsLegal; - /// \see enableExtLdPromotion. bool EnableExtLdPromotion; -protected: /// Return true if the value types that can be represented by the specified /// register class are all legal. bool isLegalRC(const TargetRegisterClass *RC) const; @@ -2212,12 +2230,12 @@ class TargetLoweringBase { /// This class also defines callbacks that targets must implement to lower /// target-specific constructs to SelectionDAG operators. class TargetLowering : public TargetLoweringBase { - TargetLowering(const TargetLowering&) = delete; - void operator=(const TargetLowering&) = delete; - public: struct DAGCombinerInfo; + TargetLowering(const TargetLowering&) = delete; + void operator=(const TargetLowering&) = delete; + /// NOTE: The TargetMachine owns TLOF. explicit TargetLowering(const TargetMachine &TM); @@ -2349,11 +2367,11 @@ class TargetLowering : public TargetLoweringBase { /// expression and return a mask of KnownOne and KnownZero bits for the /// expression (used to simplify the caller). The KnownZero/One bits may only /// be accurate for those bits in the DemandedMask. - /// \p AssumeSingleUse When this paramater is true, this function will + /// \p AssumeSingleUse When this parameter is true, this function will /// attempt to simplify \p Op even if there are multiple uses. /// Callers are responsible for correctly updating the DAG based on the /// results of this function, because simply replacing replacing TLO.Old - /// with TLO.New will be incorrect when this paramater is true and TLO.Old + /// with TLO.New will be incorrect when this parameter is true and TLO.Old /// has multiple uses. bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, APInt &KnownOne, @@ -2379,6 +2397,7 @@ class TargetLowering : public TargetLoweringBase { void *DC; // The DAG Combiner object. CombineLevel Level; bool CalledByLegalizer; + public: SelectionDAG &DAG; @@ -2550,7 +2569,7 @@ class TargetLowering : public TargetLoweringBase { ArgListEntry() : isSExt(false), isZExt(false), isInReg(false), isSRet(false), isNest(false), isByVal(false), isInAlloca(false), isReturned(false), isSwiftSelf(false), isSwiftError(false), - Alignment(0) { } + Alignment(0) {} void setAttributes(ImmutableCallSite *CS, unsigned AttrIdx); }; @@ -2689,7 +2708,6 @@ class TargetLowering : public TargetLoweringBase { ArgListTy &getArgs() { return Args; } - }; /// This function lowers an abstract call to a function into an actual call. @@ -3126,6 +3144,13 @@ class TargetLowering : public TargetLoweringBase { EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const; + /// Get a pointer to vector element \p Idx located in memory for a vector of + /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of + /// bounds the returned pointer is unspecified, but will be within the vector + /// bounds. + SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, + SDValue Idx) const; + //===--------------------------------------------------------------------===// // Instruction Emitting Hooks // @@ -3177,6 +3202,6 @@ void GetReturnInfo(Type *ReturnType, AttributeSet attr, SmallVectorImpl &Outs, const TargetLowering &TLI, const DataLayout &DL); -} // end llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_TARGET_TARGETLOWERING_H diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 0ae57ce..bd11132 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -15,8 +15,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -51,8 +55,10 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -75,7 +81,6 @@ STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); -STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); static cl::opt DisableBranchOpts( @@ -124,6 +129,15 @@ static cl::opt ProfileGuidedSectionPrefix( "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions")); +static cl::opt FreqRatioToSkipMerge( + "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), + cl::desc("Skip merging empty blocks if (frequency of empty block) / " + "(frequency of destination block) is greater than this ratio")); + +static cl::opt ForceSplitStore( + "force-split-store", cl::Hidden, cl::init(false), + cl::desc("Force store splitting no matter what the target query says.")); + namespace { typedef SmallPtrSet SetOfInstrs; typedef PointerIntPair TypeIsSExt; @@ -132,10 +146,14 @@ class TypePromotionTransaction; class CodeGenPrepare : public FunctionPass { const TargetMachine *TM; + const TargetSubtargetInfo *SubtargetInfo; const TargetLowering *TLI; + const TargetRegisterInfo *TRI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; + std::unique_ptr BFI; + std::unique_ptr BPI; /// As we scan instructions optimizing them, this is the next instruction /// to optimize. Transforms that can invalidate this should update it. @@ -182,8 +200,11 @@ class TypePromotionTransaction; private: bool eliminateFallThrough(Function &F); bool eliminateMostlyEmptyBlocks(Function &F); + BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void eliminateMostlyEmptyBlock(BasicBlock *BB); + bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, + bool isPreheader); bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); bool optimizeInst(Instruction *I, bool& ModifiedDT); bool optimizeMemoryInst(Instruction *I, Value *Addr, @@ -199,14 +220,13 @@ class TypePromotionTransaction; bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB); bool placeDbgValues(Function &F); - bool sinkAndCmp(Function &F); bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl &Exts, unsigned CreatedInstCost); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); - void stripInvariantGroupMetadata(Instruction &I); + bool splitIndirectCriticalEdges(Function &F); }; } @@ -231,10 +251,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Clear per function information. InsertedInsts.clear(); PromotedInsts.clear(); + BFI.reset(); + BPI.reset(); ModifiedDT = false; - if (TM) - TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + if (TM) { + SubtargetInfo = TM->getSubtargetImpl(F); + TLI = SubtargetInfo->getTargetLowering(); + TRI = SubtargetInfo->getRegisterInfo(); + } TLInfo = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); LI = &getAnalysis().getLoopInfo(); @@ -273,14 +298,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // find a node corresponding to the value. EverMadeChange |= placeDbgValues(F); - // If there is a mask, compare against zero, and branch that can be combined - // into a single target instruction, push the mask and compare into branch - // users. Do this before OptimizeBlock -> OptimizeInst -> - // OptimizeCmpExpression, which perturbs the pattern being searched for. - if (!DisableBranchOpts) { - EverMadeChange |= sinkAndCmp(F); + if (!DisableBranchOpts) EverMadeChange |= splitBranchCondition(F); - } + + // Split some critical edges where one of the sources is an indirect branch, + // to help generate sane code for PHIs involving such edges. + EverMadeChange |= splitIndirectCriticalEdges(F); bool MadeChange = true; while (MadeChange) { @@ -383,6 +406,186 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { return Changed; } +/// Find a destination block from BB if BB is mergeable empty block. +BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { + // If this block doesn't end with an uncond branch, ignore it. + BranchInst *BI = dyn_cast(BB->getTerminator()); + if (!BI || !BI->isUnconditional()) + return nullptr; + + // If the instruction before the branch (skipping debug info) isn't a phi + // node, then other stuff is happening here. + BasicBlock::iterator BBI = BI->getIterator(); + if (BBI != BB->begin()) { + --BBI; + while (isa(BBI)) { + if (BBI == BB->begin()) + break; + --BBI; + } + if (!isa(BBI) && !isa(BBI)) + return nullptr; + } + + // Do not break infinite loops. + BasicBlock *DestBB = BI->getSuccessor(0); + if (DestBB == BB) + return nullptr; + + if (!canMergeBlocks(BB, DestBB)) + DestBB = nullptr; + + return DestBB; +} + +// Return the unique indirectbr predecessor of a block. This may return null +// even if such a predecessor exists, if it's not useful for splitting. +// If a predecessor is found, OtherPreds will contain all other (non-indirectbr) +// predecessors of BB. +static BasicBlock * +findIBRPredecessor(BasicBlock *BB, SmallVectorImpl &OtherPreds) { + // If the block doesn't have any PHIs, we don't care about it, since there's + // no point in splitting it. + PHINode *PN = dyn_cast(BB->begin()); + if (!PN) + return nullptr; + + // Verify we have exactly one IBR predecessor. + // Conservatively bail out if one of the other predecessors is not a "regular" + // terminator (that is, not a switch or a br). + BasicBlock *IBB = nullptr; + for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) { + BasicBlock *PredBB = PN->getIncomingBlock(Pred); + TerminatorInst *PredTerm = PredBB->getTerminator(); + switch (PredTerm->getOpcode()) { + case Instruction::IndirectBr: + if (IBB) + return nullptr; + IBB = PredBB; + break; + case Instruction::Br: + case Instruction::Switch: + OtherPreds.push_back(PredBB); + continue; + default: + return nullptr; + } + } + + return IBB; +} + +// Split critical edges where the source of the edge is an indirectbr +// instruction. This isn't always possible, but we can handle some easy cases. +// This is useful because MI is unable to split such critical edges, +// which means it will not be able to sink instructions along those edges. +// This is especially painful for indirect branches with many successors, where +// we end up having to prepare all outgoing values in the origin block. +// +// Our normal algorithm for splitting critical edges requires us to update +// the outgoing edges of the edge origin block, but for an indirectbr this +// is hard, since it would require finding and updating the block addresses +// the indirect branch uses. But if a block only has a single indirectbr +// predecessor, with the others being regular branches, we can do it in a +// different way. +// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr. +// We can split D into D0 and D1, where D0 contains only the PHIs from D, +// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and +// create the following structure: +// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1 +bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) { + // Check whether the function has any indirectbrs, and collect which blocks + // they may jump to. Since most functions don't have indirect branches, + // this lowers the common case's overhead to O(Blocks) instead of O(Edges). + SmallSetVector Targets; + for (auto &BB : F) { + auto *IBI = dyn_cast(BB.getTerminator()); + if (!IBI) + continue; + + for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ) + Targets.insert(IBI->getSuccessor(Succ)); + } + + if (Targets.empty()) + return false; + + bool Changed = false; + for (BasicBlock *Target : Targets) { + SmallVector OtherPreds; + BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds); + // If we did not found an indirectbr, or the indirectbr is the only + // incoming edge, this isn't the kind of edge we're looking for. + if (!IBRPred || OtherPreds.empty()) + continue; + + // Don't even think about ehpads/landingpads. + Instruction *FirstNonPHI = Target->getFirstNonPHI(); + if (FirstNonPHI->isEHPad() || Target->isLandingPad()) + continue; + + BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split"); + // It's possible Target was its own successor through an indirectbr. + // In this case, the indirectbr now comes from BodyBlock. + if (IBRPred == Target) + IBRPred = BodyBlock; + + // At this point Target only has PHIs, and BodyBlock has the rest of the + // block's body. Create a copy of Target that will be used by the "direct" + // preds. + ValueToValueMapTy VMap; + BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); + + for (BasicBlock *Pred : OtherPreds) + Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + + // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that + // they are clones, so the number of PHIs are the same. + // (a) Remove the edge coming from IBRPred from the "Direct" PHI + // (b) Leave that as the only edge in the "Indirect" PHI. + // (c) Merge the two in the body block. + BasicBlock::iterator Indirect = Target->begin(), + End = Target->getFirstNonPHI()->getIterator(); + BasicBlock::iterator Direct = DirectSucc->begin(); + BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt(); + + assert(&*End == Target->getTerminator() && + "Block was expected to only contain PHIs"); + + while (Indirect != End) { + PHINode *DirPHI = cast(Direct); + PHINode *IndPHI = cast(Indirect); + + // Now, clean up - the direct block shouldn't get the indirect value, + // and vice versa. + DirPHI->removeIncomingValue(IBRPred); + Direct++; + + // Advance the pointer here, to avoid invalidation issues when the old + // PHI is erased. + Indirect++; + + PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI); + NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred), + IBRPred); + + // Create a PHI in the body block, to merge the direct and indirect + // predecessors. + PHINode *MergePHI = + PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert); + MergePHI->addIncoming(NewIndPHI, Target); + MergePHI->addIncoming(DirPHI, DirectSucc); + + IndPHI->replaceAllUsesWith(MergePHI); + IndPHI->eraseFromParent(); + } + + Changed = true; + } + + return Changed; +} + /// Eliminate blocks that contain only PHI nodes, debug info directives, and an /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split /// edges in ways that are non-optimal for isel. Start by eliminating these @@ -401,46 +604,106 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { // Note that this intentionally skips the entry block. for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { BasicBlock *BB = &*I++; + BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); + if (!DestBB || + !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) + continue; + + eliminateMostlyEmptyBlock(BB); + MadeChange = true; + } + return MadeChange; +} + +bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, + BasicBlock *DestBB, + bool isPreheader) { + // Do not delete loop preheaders if doing so would create a critical edge. + // Loop preheaders can be good locations to spill registers. If the + // preheader is deleted and we create a critical edge, registers may be + // spilled in the loop body instead. + if (!DisablePreheaderProtect && isPreheader && + !(BB->getSinglePredecessor() && + BB->getSinglePredecessor()->getSingleSuccessor())) + return false; + + // Try to skip merging if the unique predecessor of BB is terminated by a + // switch or indirect branch instruction, and BB is used as an incoming block + // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to + // add COPY instructions in the predecessor of BB instead of BB (if it is not + // merged). Note that the critical edge created by merging such blocks wont be + // split in MachineSink because the jump table is not analyzable. By keeping + // such empty block (BB), ISel will place COPY instructions in BB, not in the + // predecessor of BB. + BasicBlock *Pred = BB->getUniquePredecessor(); + if (!Pred || + !(isa(Pred->getTerminator()) || + isa(Pred->getTerminator()))) + return true; - // If this block doesn't end with an uncond branch, ignore it. - BranchInst *BI = dyn_cast(BB->getTerminator()); - if (!BI || !BI->isUnconditional()) + if (BB->getTerminator() != BB->getFirstNonPHI()) + return true; + + // We use a simple cost heuristic which determine skipping merging is + // profitable if the cost of skipping merging is less than the cost of + // merging : Cost(skipping merging) < Cost(merging BB), where the + // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and + // the Cost(merging BB) is Freq(Pred) * Cost(Copy). + // Assuming Cost(Copy) == Cost(Branch), we could simplify it to : + // Freq(Pred) / Freq(BB) > 2. + // Note that if there are multiple empty blocks sharing the same incoming + // value for the PHIs in the DestBB, we consider them together. In such + // case, Cost(merging BB) will be the sum of their frequencies. + + if (!isa(DestBB->begin())) + return true; + + SmallPtrSet SameIncomingValueBBs; + + // Find all other incoming blocks from which incoming values of all PHIs in + // DestBB are the same as the ones from BB. + for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E; + ++PI) { + BasicBlock *DestBBPred = *PI; + if (DestBBPred == BB) continue; - // If the instruction before the branch (skipping debug info) isn't a phi - // node, then other stuff is happening here. - BasicBlock::iterator BBI = BI->getIterator(); - if (BBI != BB->begin()) { - --BBI; - while (isa(BBI)) { - if (BBI == BB->begin()) - break; - --BBI; + bool HasAllSameValue = true; + BasicBlock::const_iterator DestBBI = DestBB->begin(); + while (const PHINode *DestPN = dyn_cast(DestBBI++)) { + if (DestPN->getIncomingValueForBlock(BB) != + DestPN->getIncomingValueForBlock(DestBBPred)) { + HasAllSameValue = false; + break; } - if (!isa(BBI) && !isa(BBI)) - continue; } + if (HasAllSameValue) + SameIncomingValueBBs.insert(DestBBPred); + } - // Do not break infinite loops. - BasicBlock *DestBB = BI->getSuccessor(0); - if (DestBB == BB) - continue; + // See if all BB's incoming values are same as the value from Pred. In this + // case, no reason to skip merging because COPYs are expected to be place in + // Pred already. + if (SameIncomingValueBBs.count(Pred)) + return true; - if (!canMergeBlocks(BB, DestBB)) - continue; + if (!BFI) { + Function &F = *BB->getParent(); + LoopInfo LI{DominatorTree(F)}; + BPI.reset(new BranchProbabilityInfo(F, LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); + } - // Do not delete loop preheaders if doing so would create a critical edge. - // Loop preheaders can be good locations to spill registers. If the - // preheader is deleted and we create a critical edge, registers may be - // spilled in the loop body instead. - if (!DisablePreheaderProtect && Preheaders.count(BB) && - !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor())) - continue; + BlockFrequency PredFreq = BFI->getBlockFreq(Pred); + BlockFrequency BBFreq = BFI->getBlockFreq(BB); - eliminateMostlyEmptyBlock(BB); - MadeChange = true; - } - return MadeChange; + for (auto SameValueBB : SameIncomingValueBBs) + if (SameValueBB->getUniquePredecessor() == Pred && + DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB)) + BBFreq += BFI->getBlockFreq(SameValueBB); + + return PredFreq.getFrequency() <= + BBFreq.getFrequency() * FreqRatioToSkipMerge; } /// Return true if we can merge BB into DestBB if there is a single @@ -981,6 +1244,83 @@ static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) { return false; } +/// Duplicate and sink the given 'and' instruction into user blocks where it is +/// used in a compare to allow isel to generate better code for targets where +/// this operation can be combined. +/// +/// Return true if any changes are made. +static bool sinkAndCmp0Expression(Instruction *AndI, + const TargetLowering &TLI, + SetOfInstrs &InsertedInsts) { + // Double-check that we're not trying to optimize an instruction that was + // already optimized by some other part of this pass. + assert(!InsertedInsts.count(AndI) && + "Attempting to optimize already optimized and instruction"); + (void) InsertedInsts; + + // Nothing to do for single use in same basic block. + if (AndI->hasOneUse() && + AndI->getParent() == cast(*AndI->user_begin())->getParent()) + return false; + + // Try to avoid cases where sinking/duplicating is likely to increase register + // pressure. + if (!isa(AndI->getOperand(0)) && + !isa(AndI->getOperand(1)) && + AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse()) + return false; + + for (auto *U : AndI->users()) { + Instruction *User = cast(U); + + // Only sink for and mask feeding icmp with 0. + if (!isa(User)) + return false; + + auto *CmpC = dyn_cast(User->getOperand(1)); + if (!CmpC || !CmpC->isZero()) + return false; + } + + if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI)) + return false; + + DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n"); + DEBUG(AndI->getParent()->dump()); + + // Push the 'and' into the same block as the icmp 0. There should only be + // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any + // others, so we don't need to keep track of which BBs we insert into. + for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI); + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n"); + + // Keep the 'and' in the same place if the use is already in the same block. + Instruction *InsertPt = + User->getParent() == AndI->getParent() ? AndI : User; + Instruction *InsertedAnd = + BinaryOperator::Create(Instruction::And, AndI->getOperand(0), + AndI->getOperand(1), "", InsertPt); + // Propagate the debug info. + InsertedAnd->setDebugLoc(AndI->getDebugLoc()); + + // Replace a use of the 'and' with a use of the new 'and'. + TheUse = InsertedAnd; + ++NumAndUses; + DEBUG(User->getParent()->dump()); + } + + // We removed all uses, nuke the and. + AndI->eraseFromParent(); + return true; +} + /// Check if the candidates could be combined with a shift instruction, which /// includes: /// 1. Truncate instruction @@ -1843,18 +2183,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { default: break; case Intrinsic::objectsize: { // Lower all uses of llvm.objectsize.* - uint64_t Size; - Type *ReturnTy = CI->getType(); - Constant *RetVal = nullptr; - ConstantInt *Op1 = cast(II->getArgOperand(1)); - ObjSizeMode Mode = Op1->isZero() ? ObjSizeMode::Max : ObjSizeMode::Min; - if (getObjectSize(II->getArgOperand(0), - Size, *DL, TLInfo, false, Mode)) { - RetVal = ConstantInt::get(ReturnTy, Size); - } else { - RetVal = ConstantInt::get(ReturnTy, - Mode == ObjSizeMode::Min ? 0 : -1ULL); - } + ConstantInt *RetVal = + lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); // Substituting this can cause recursive simplifications, which can // invalidate our iterator. Use a WeakVH to hold onto it in case this // happens. @@ -1929,16 +2259,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } if (TLI) { - // Unknown address space. - // TODO: Target hook to pick which address space the intrinsic cares - // about? - unsigned AddrSpace = ~0u; SmallVector PtrOps; Type *AccessTy; - if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace)) - while (!PtrOps.empty()) - if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace)) + if (TLI->getAddrModeArguments(II, PtrOps, AccessTy)) + while (!PtrOps.empty()) { + Value *PtrVal = PtrOps.pop_back_val(); + unsigned AS = PtrVal->getType()->getPointerAddressSpace(); + if (optimizeMemoryInst(II, PtrVal, AccessTy, AS)) return true; + } } } @@ -2606,8 +2935,8 @@ void TypePromotionTransaction::rollback( /// This encapsulates the logic for matching the target-legal addressing modes. class AddressingModeMatcher { SmallVectorImpl &AddrModeInsts; - const TargetMachine &TM; const TargetLowering &TLI; + const TargetRegisterInfo &TRI; const DataLayout &DL; /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and @@ -2632,14 +2961,14 @@ class AddressingModeMatcher { bool IgnoreProfitability; AddressingModeMatcher(SmallVectorImpl &AMI, - const TargetMachine &TM, Type *AT, unsigned AS, + const TargetLowering &TLI, + const TargetRegisterInfo &TRI, + Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT) - : AddrModeInsts(AMI), TM(TM), - TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent()) - ->getTargetLowering()), + : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT) { @@ -2657,13 +2986,15 @@ class AddressingModeMatcher { static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, SmallVectorImpl &AddrModeInsts, - const TargetMachine &TM, + const TargetLowering &TLI, + const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS, + bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, + AccessTy, AS, MemoryInst, Result, InsertedInsts, PromotedInsts, TPT).matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -3484,18 +3815,18 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { /// Check to see if all uses of OpVal by the specified inline asm call are due /// to memory operands. If so, return true, otherwise return false. static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, - const TargetMachine &TM) { + const TargetLowering &TLI, + const TargetRegisterInfo &TRI) { const Function *F = CI->getParent()->getParent(); - const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI, + TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, ImmutableCallSite(CI)); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, SDValue()); + TLI.ComputeConstraintToUse(OpInfo, SDValue()); // If this asm operand is our Value*, and if it isn't an indirect memory // operand, we can't fold it! @@ -3514,7 +3845,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl> &MemoryUses, - SmallPtrSetImpl &ConsideredInsts, const TargetMachine &TM) { + SmallPtrSetImpl &ConsideredInsts, + const TargetLowering &TLI, const TargetRegisterInfo &TRI) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -3551,12 +3883,12 @@ static bool FindAllMemoryUses( if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. - if (!IsOperandAMemoryOperand(CI, IA, I, TM)) + if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI)) return true; continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI)) return true; } @@ -3644,7 +3976,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // the use is just a particularly nice way of sinking it. SmallVector, 16> MemoryUses; SmallPtrSet ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM)) + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of @@ -3676,7 +4008,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode Result; TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS, + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, + AddressAccessTy, AS, MemoryInst, Result, InsertedInsts, PromotedInsts, TPT); Matcher.IgnoreProfitability = true; @@ -3770,7 +4103,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // addressing instructions might have. SmallVector NewAddrModeInsts; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( - V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM, + V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TLI, *TRI, InsertedInsts, PromotedInsts, TPT); // This check is broken into two cases with very similar code to avoid using @@ -3836,11 +4169,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) - SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && TM && - TM->getSubtargetImpl(*MemoryInst->getParent()->getParent()) - ->useAA())) { + SubtargetInfo->useAA())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " @@ -3943,7 +4275,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // We need to add this separately from the scale above to help with // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) - ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } @@ -3954,12 +4286,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, SunkAddr = ResultPtr; } else { if (ResultPtr->getType() != I8PtrTy) - ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } if (SunkAddr->getType() != Addr->getType()) - SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); } } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " @@ -4156,7 +4488,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { /// %ld = load i32* %addr /// %zext = zext i32 %ld to i64 /// %add = add nuw i64 %zext, 4 -/// \encode +/// \endcode /// Thanks to the promotion, we can match zext(load i32*) to i64. bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, @@ -4198,7 +4530,10 @@ bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT, // one extension but leave one. However, we optimistically keep going, // because the new extension may be removed too. long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; - TotalCreatedInstsCost -= ExtCost; + // FIXME: It would be possible to propagate a negative value instead of + // conservatively ceiling it to 0. + TotalCreatedInstsCost = + std::max((long long)0, (TotalCreatedInstsCost - ExtCost)); if (!StressExtLdPromotion && (TotalCreatedInstsCost > 1 || !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) { @@ -4435,13 +4770,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) return false; - // Skip loads we've already transformed or have no reason to transform. - if (Load->hasOneUse()) { - User *LoadUser = *Load->user_begin(); - if (cast(LoadUser)->getParent() == Load->getParent() && - !dyn_cast(LoadUser)) - return false; - } + // Skip loads we've already transformed. + if (Load->hasOneUse() && + InsertedInsts.count(cast(*Load->user_begin()))) + return false; // Look at all uses of Load, looking through phis, to determine how many bits // of the loaded value are needed. @@ -4537,6 +4869,9 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { IRBuilder<> Builder(Load->getNextNode()); auto *NewAnd = dyn_cast( Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); + // Mark this instruction as "inserted by CGP", so that other + // optimizations don't touch it. + InsertedInsts.insert(NewAnd); // Replace all uses of load with new and (except for the use of load in the // new and itself). @@ -5263,6 +5598,117 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { return false; } +/// For the instruction sequence of store below, F and I values +/// are bundled together as an i64 value before being stored into memory. +/// Sometimes it is more efficent to generate separate stores for F and I, +/// which can remove the bitwise instructions or sink them to colder places. +/// +/// (store (or (zext (bitcast F to i32) to i64), +/// (shl (zext I to i64), 32)), addr) --> +/// (store F, addr) and (store I, addr+4) +/// +/// Similarly, splitting for other merged store can also be beneficial, like: +/// For pair of {i32, i32}, i64 store --> two i32 stores. +/// For pair of {i32, i16}, i64 store --> two i32 stores. +/// For pair of {i16, i16}, i32 store --> two i16 stores. +/// For pair of {i16, i8}, i32 store --> two i16 stores. +/// For pair of {i8, i8}, i16 store --> two i8 stores. +/// +/// We allow each target to determine specifically which kind of splitting is +/// supported. +/// +/// The store patterns are commonly seen from the simple code snippet below +/// if only std::make_pair(...) is sroa transformed before inlined into hoo. +/// void goo(const std::pair &); +/// hoo() { +/// ... +/// goo(std::make_pair(tmp, ftmp)); +/// ... +/// } +/// +/// Although we already have similar splitting in DAG Combine, we duplicate +/// it in CodeGenPrepare to catch the case in which pattern is across +/// multiple BBs. The logic in DAG Combine is kept to catch case generated +/// during code expansion. +static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, + const TargetLowering &TLI) { + // Handle simple but common cases only. + Type *StoreType = SI.getValueOperand()->getType(); + if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) || + DL.getTypeSizeInBits(StoreType) == 0) + return false; + + unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2; + Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize); + if (DL.getTypeStoreSizeInBits(SplitStoreType) != + DL.getTypeSizeInBits(SplitStoreType)) + return false; + + // Match the following patterns: + // (store (or (zext LValue to i64), + // (shl (zext HValue to i64), 32)), HalfValBitSize) + // or + // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize) + // (zext LValue to i64), + // Expect both operands of OR and the first operand of SHL have only + // one use. + Value *LValue, *HValue; + if (!match(SI.getValueOperand(), + m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))), + m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))), + m_SpecificInt(HalfValBitSize)))))) + return false; + + // Check LValue and HValue are int with size less or equal than 32. + if (!LValue->getType()->isIntegerTy() || + DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize || + !HValue->getType()->isIntegerTy() || + DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize) + return false; + + // If LValue/HValue is a bitcast instruction, use the EVT before bitcast + // as the input of target query. + auto *LBC = dyn_cast(LValue); + auto *HBC = dyn_cast(HValue); + EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType()) + : EVT::getEVT(LValue->getType()); + EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType()) + : EVT::getEVT(HValue->getType()); + if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) + return false; + + // Start to split store. + IRBuilder<> Builder(SI.getContext()); + Builder.SetInsertPoint(&SI); + + // If LValue/HValue is a bitcast in another BB, create a new one in current + // BB so it may be merged with the splitted stores by dag combiner. + if (LBC && LBC->getParent() != SI.getParent()) + LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType()); + if (HBC && HBC->getParent() != SI.getParent()) + HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); + + auto CreateSplitStore = [&](Value *V, bool Upper) { + V = Builder.CreateZExtOrBitCast(V, SplitStoreType); + Value *Addr = Builder.CreateBitCast( + SI.getOperand(1), + SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); + if (Upper) + Addr = Builder.CreateGEP( + SplitStoreType, Addr, + ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); + Builder.CreateAlignedStore( + V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment()); + }; + + CreateSplitStore(LValue, false); + CreateSplitStore(HValue, true); + + // Delete the old store. + SI.eraseFromParent(); + return true; +} + bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. @@ -5316,7 +5762,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { return OptimizeCmpExpression(CI, TLI); if (LoadInst *LI = dyn_cast(I)) { - stripInvariantGroupMetadata(*LI); + LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); if (TLI) { bool Modified = optimizeLoadExt(LI); unsigned AS = LI->getPointerAddressSpace(); @@ -5327,7 +5773,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { } if (StoreInst *SI = dyn_cast(I)) { - stripInvariantGroupMetadata(*SI); + if (TLI && splitMergedValStore(*SI, *DL, *TLI)) + return true; + SI->setMetadata(LLVMContext::MD_invariant_group, nullptr); if (TLI) { unsigned AS = SI->getPointerAddressSpace(); return optimizeMemoryInst(I, SI->getOperand(1), @@ -5338,6 +5786,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { BinaryOperator *BinOp = dyn_cast(I); + if (BinOp && (BinOp->getOpcode() == Instruction::And) && + EnableAndCmpSinking && TLI) + return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts); + if (BinOp && (BinOp->getOpcode() == Instruction::AShr || BinOp->getOpcode() == Instruction::LShr)) { ConstantInt *CI = dyn_cast(BinOp->getOperand(1)); @@ -5467,68 +5919,6 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { return MadeChange; } -// If there is a sequence that branches based on comparing a single bit -// against zero that can be combined into a single instruction, and the -// target supports folding these into a single instruction, sink the -// mask and compare into the branch uses. Do this before OptimizeBlock -> -// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being -// searched for. -bool CodeGenPrepare::sinkAndCmp(Function &F) { - if (!EnableAndCmpSinking) - return false; - if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) - return false; - bool MadeChange = false; - for (BasicBlock &BB : F) { - // Does this BB end with the following? - // %andVal = and %val, #single-bit-set - // %icmpVal = icmp %andResult, 0 - // br i1 %cmpVal label %dest1, label %dest2" - BranchInst *Brcc = dyn_cast(BB.getTerminator()); - if (!Brcc || !Brcc->isConditional()) - continue; - ICmpInst *Cmp = dyn_cast(Brcc->getOperand(0)); - if (!Cmp || Cmp->getParent() != &BB) - continue; - ConstantInt *Zero = dyn_cast(Cmp->getOperand(1)); - if (!Zero || !Zero->isZero()) - continue; - Instruction *And = dyn_cast(Cmp->getOperand(0)); - if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB) - continue; - ConstantInt* Mask = dyn_cast(And->getOperand(1)); - if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) - continue; - DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump()); - - // Push the "and; icmp" for any users that are conditional branches. - // Since there can only be one branch use per BB, we don't need to keep - // track of which BBs we insert into. - for (Use &TheUse : Cmp->uses()) { - // Find brcc use. - BranchInst *BrccUser = dyn_cast(TheUse); - if (!BrccUser || !BrccUser->isConditional()) - continue; - BasicBlock *UserBB = BrccUser->getParent(); - if (UserBB == &BB) continue; - DEBUG(dbgs() << "found Brcc use\n"); - - // Sink the "and; icmp" to use. - MadeChange = true; - BinaryOperator *NewAnd = - BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", - BrccUser); - CmpInst *NewCmp = - CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, - "", BrccUser); - TheUse = NewCmp; - ++NumAndCmpsMoved; - DEBUG(BrccUser->getParent()->dump()); - } - } - return MadeChange; -} - /// \brief Scale down both weights to fit into uint32_t. static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; @@ -5733,8 +6123,3 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } return MadeChange; } - -void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) { - if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group)) - I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID()); -} diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 422f2dc..24bc15e 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -558,7 +558,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI); // We take the DebugLoc from MI, since OrigMI may be attributed to a - // different source location. + // different source location. auto *NewMI = LIS.getInstructionFromIndex(DefIdx); NewMI->setDebugLoc(MI.getDebugLoc()); @@ -1124,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills( // earlier spill with smaller SlotIndex. for (const auto CurrentSpill : Spills) { MachineBasicBlock *Block = CurrentSpill->getParent(); - MachineDomTreeNode *Node = MDT.DT->getNode(Block); + MachineDomTreeNode *Node = MDT.getBase().getNode(Block); MachineInstr *PrevSpill = SpillBBToSpill[Node]; if (PrevSpill) { SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill); @@ -1132,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills( MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill; MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill; SpillsToRm.push_back(SpillToRm); - SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep; + SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep; } else { - SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill; + SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill; } } for (const auto SpillToRm : SpillsToRm) @@ -1209,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders( // Sort the nodes in WorkSet in top-down order and save the nodes // in Orders. Orders will be used for hoisting in runHoistSpills. unsigned idx = 0; - Orders.push_back(MDT.DT->getNode(Root)); + Orders.push_back(MDT.getBase().getNode(Root)); do { MachineDomTreeNode *Node = Orders[idx++]; const std::vector &Children = Node->getChildren(); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ad8140b..b48c839 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -148,8 +149,11 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); + iterator E = end(); - while (I != E && (I->isPHI() || I->isPosition())) + while (I != E && (I->isPHI() || I->isPosition() || + TII->isBasicBlockPrologue(*I))) ++I; // FIXME: This needs to change if we wish to bundle labels // inside the bundle. @@ -160,8 +164,11 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) { + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); + iterator E = end(); - while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue())) + while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() || + TII->isBasicBlockPrologue(*I))) ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values // inside the bundle. @@ -191,10 +198,7 @@ MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() { // Skip over begin-of-block dbg_value instructions. - iterator I = begin(), E = end(); - while (I != E && I->isDebugValue()) - ++I; - return I; + return skipDebugInstructionsForward(begin(), end()); } MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { @@ -229,7 +233,7 @@ StringRef MachineBasicBlock::getName() const { if (const BasicBlock *LBB = getBasicBlock()) return LBB->getName(); else - return "(null)"; + return StringRef("", 0); } /// Return a hopefully unique identifier for this block. @@ -290,9 +294,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (!livein_empty()) { if (Indexes) OS << '\t'; OS << " Live Ins:"; - for (const auto &LI : make_range(livein_begin(), livein_end())) { + for (const auto &LI : LiveIns) { OS << ' ' << PrintReg(LI.PhysReg, TRI); - if (LI.LaneMask != ~0u) + if (!LI.LaneMask.all()) OS << ':' << PrintLaneMask(LI.LaneMask); } OS << '\n'; @@ -343,14 +347,14 @@ void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { return; I->LaneMask &= ~LaneMask; - if (I->LaneMask == 0) + if (I->LaneMask.none()) LiveIns.erase(I); } bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { livein_iterator I = find_if( LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); - return I != livein_end() && (I->LaneMask & LaneMask) != 0; + return I != livein_end() && (I->LaneMask & LaneMask).any(); } void MachineBasicBlock::sortUniqueLiveIns() { @@ -421,7 +425,7 @@ void MachineBasicBlock::updateTerminator() { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; - DebugLoc DL; // FIXME: this is nowhere + DebugLoc DL = findBranchDebugLoc(); bool B = TII->analyzeBranch(*this, TBB, FBB, Cond); (void) B; assert(!B && "UpdateTerminators requires analyzable predecessors!"); @@ -489,7 +493,7 @@ void MachineBasicBlock::updateTerminator() { // FIXME: This does not seem like a reasonable pattern to support, but it // has been seen in the wild coming out of degenerate ARM test cases. TII->removeBranch(*this); - + // Finally update the unconditional successor to be reached via a branch if // it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) @@ -1141,16 +1145,28 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, /// instructions. Return UnknownLoc if there is none. DebugLoc MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { - DebugLoc DL; - instr_iterator E = instr_end(); - if (MBBI == E) - return DL; - // Skip debug declarations, we don't want a DebugLoc from them. - while (MBBI != E && MBBI->isDebugValue()) - MBBI++; - if (MBBI != E) - DL = MBBI->getDebugLoc(); + MBBI = skipDebugInstructionsForward(MBBI, instr_end()); + if (MBBI != instr_end()) + return MBBI->getDebugLoc(); + return {}; +} + +/// Find and return the merged DebugLoc of the branch instructions of the block. +/// Return UnknownLoc if there is none. +DebugLoc +MachineBasicBlock::findBranchDebugLoc() { + DebugLoc DL; + auto TI = getFirstTerminator(); + while (TI != end() && !TI->isBranch()) + ++TI; + + if (TI != end()) { + DL = TI->getDebugLoc(); + for (++TI ; TI != end() ; ++TI) + if (TI->isBranch()) + DL = DILocation::getMergedLocation(DL, TI->getDebugLoc()); + } return DL; } @@ -1298,3 +1314,14 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const { // care what kind of return it is, putting a mask after it is a no-op. return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr; } + +void MachineBasicBlock::clearLiveIns() { + LiveIns.clear(); +} + +MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const { + assert(getParent()->getProperties().hasProperty( + MachineFunctionProperties::Property::TracksLiveness) && + "Liveness information is accurate"); + return LiveIns.begin(); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 452433d..e5e2bb2 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -169,6 +169,7 @@ void MachineFunction::clear() { InstructionRecycler.clear(Allocator); OperandRecycler.clear(Allocator); BasicBlockRecycler.clear(Allocator); + VariableDbgInfos.clear(); if (RegInfo) { RegInfo->~MachineRegisterInfo(); Allocator.Deallocate(RegInfo); @@ -956,7 +957,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void MachineFrameInfo::dump(const MachineFunction &MF) const { +LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const { print(MF, dbgs()); } #endif diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b50347d..535e0de 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -25,6 +24,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -232,9 +232,12 @@ namespace { SDValue visitTokenFactor(SDNode *N); SDValue visitMERGE_VALUES(SDNode *N); SDValue visitADD(SDNode *N); + SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitSUB(SDNode *N); SDValue visitADDC(SDNode *N); + SDValue visitUADDO(SDNode *N); SDValue visitSUBC(SDNode *N); + SDValue visitUSUBO(SDNode *N); SDValue visitADDE(SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitMUL(SDNode *N); @@ -274,6 +277,7 @@ namespace { SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); + SDValue visitAssertZext(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); @@ -336,6 +340,7 @@ namespace { SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); SDValue foldSelectOfConstants(SDNode *N); + SDValue foldBinOpIntoSelect(SDNode *BO); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); @@ -385,9 +390,9 @@ namespace { SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); - SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef VectorMask, - SDValue VecIn1, SDValue VecIn2, - unsigned LeftIdx); + SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, + ArrayRef VectorMask, SDValue VecIn1, + SDValue VecIn2, unsigned LeftIdx); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -608,10 +613,16 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, switch (Op.getOpcode()) { default: return false; - case ISD::ConstantFP: - // Don't invert constant FP values after legalize. The negated constant - // isn't necessarily legal. - return LegalOperations ? 0 : 1; + case ISD::ConstantFP: { + if (!LegalOperations) + return 1; + + // Don't invert constant FP values after legalization unless the target says + // the negated constant is legal. + EVT VT = Op.getValueType(); + return TLI.isOperationLegal(ISD::ConstantFP, VT) || + TLI.isFPImmLegal(neg(cast(Op)->getValueAPF()), VT); + } case ISD::FADD: // FIXME: determine better conditions for this xform. if (!Options->UnsafeFPMath) return 0; @@ -630,7 +641,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros()) + if (!Options->NoSignedZerosFPMath && + !Op.getNode()->getFlags()->hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -1362,8 +1374,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { else { assert(N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && "Type mismatch"); - SDValue OpV = RV; - DAG.ReplaceAllUsesWith(N, &OpV); + DAG.ReplaceAllUsesWith(N, &RV); } // Push the new node and any users onto the worklist @@ -1390,7 +1401,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADD: return visitADD(N); case ISD::SUB: return visitSUB(N); case ISD::ADDC: return visitADDC(N); + case ISD::UADDO: return visitUADDO(N); case ISD::SUBC: return visitSUBC(N); + case ISD::USUBO: return visitUSUBO(N); case ISD::ADDE: return visitADDE(N); case ISD::SUBE: return visitSUBE(N); case ISD::MUL: return visitMUL(N); @@ -1431,6 +1444,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); + case ISD::AssertZext: return visitAssertZext(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); @@ -1665,6 +1679,60 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { return Const != nullptr && !Const->isOpaque() ? Const : nullptr; } +SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { + auto BinOpcode = BO->getOpcode(); + assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || + BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || + BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || + BinOpcode == ISD::UREM || BinOpcode == ISD::AND || + BinOpcode == ISD::OR || BinOpcode == ISD::XOR || + BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || + BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || + BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || + BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && + "Unexpected binary operator"); + + // Bail out if any constants are opaque because we can't constant fold those. + SDValue C1 = BO->getOperand(1); + if (!isConstantOrConstantVector(C1, true) && + !isConstantFPBuildVectorOrConstantFP(C1)) + return SDValue(); + + // Don't do this unless the old select is going away. We want to eliminate the + // binary operator, not replace a binop with a select. + // TODO: Handle ISD::SELECT_CC. + SDValue Sel = BO->getOperand(0); + if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) + return SDValue(); + + SDValue CT = Sel.getOperand(1); + if (!isConstantOrConstantVector(CT, true) && + !isConstantFPBuildVectorOrConstantFP(CT)) + return SDValue(); + + SDValue CF = Sel.getOperand(2); + if (!isConstantOrConstantVector(CF, true) && + !isConstantFPBuildVectorOrConstantFP(CF)) + return SDValue(); + + // We have a select-of-constants followed by a binary operator with a + // constant. Eliminate the binop by pulling the constant math into the select. + // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1 + EVT VT = Sel.getValueType(); + SDLoc DL(Sel); + SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); + assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) || + isConstantFPBuildVectorOrConstantFP(NewCT)) && + "Failed to constant fold a binop with constant operands"); + + SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); + assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) || + isConstantFPBuildVectorOrConstantFP(NewCF)) && + "Failed to constant fold a binop with constant operands"); + + return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1713,6 +1781,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } } + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // reassociate add if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1)) return RADD; @@ -1775,6 +1846,19 @@ SDValue DAGCombiner::visitADD(SDNode *N) { VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); + if (SDValue Combined = visitADDLike(N0, N1, N)) + return Combined; + + if (SDValue Combined = visitADDLike(N1, N0, N)) + return Combined; + + return SDValue(); +} + +SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { + EVT VT = N0.getValueType(); + SDLoc DL(LocReference); + // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) @@ -1782,12 +1866,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { DAG.getNode(ISD::SHL, DL, VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); - if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && - isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0))) - return DAG.getNode(ISD::SUB, DL, VT, N1, - DAG.getNode(ISD::SHL, DL, VT, - N0.getOperand(0).getOperand(1), - N0.getOperand(1))); if (N1.getOpcode() == ISD::AND) { SDValue AndOp0 = N1.getOperand(0); @@ -1798,7 +1876,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // and similar xforms where the inner op is either ~0 or 0. if (NumSignBits == DestBits && isOneConstantOrOneSplatConstant(N1->getOperand(1))) - return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); + return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0); } // add (sext i1), X -> sub X, (zext i1) @@ -1826,39 +1904,61 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // If the flag result is dead, turn this into an ADD. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), - DAG.getNode(ISD::CARRY_FALSE, - SDLoc(N), MVT::Glue)); + return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // canonicalize constant to RHS. ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) - return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); + return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, - SDLoc(N), MVT::Glue)); + DL, MVT::Glue)); + + // If it cannot overflow, transform into an add. + if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) + return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + return SDValue(); +} - // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. - APInt LHSZero, LHSOne; - APInt RHSZero, RHSOne; - DAG.computeKnownBits(N0, LHSZero, LHSOne); +SDValue DAGCombiner::visitUADDO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + if (VT.isVector()) + return SDValue(); - if (LHSZero.getBoolValue()) { - DAG.computeKnownBits(N1, RHSZero, RHSOne); + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); - // If all possibly-set bits on the LHS are clear on the RHS, return an OR. - // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), - DAG.getNode(ISD::CARRY_FALSE, - SDLoc(N), MVT::Glue)); - } + // If the flag result is dead, turn this into an ADD. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), + DAG.getUNDEF(CarryVT)); + + // canonicalize constant to RHS. + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0); + + // fold (uaddo x, 0) -> x + no carry out + if (isNullConstant(N1)) + return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); + + // If it cannot overflow, transform into an add. + if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) + return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), + DAG.getConstant(0, DL, CarryVT)); return SDValue(); } @@ -1921,6 +2021,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { N1.getNode()); } + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); // fold (sub x, c) -> (add x, -c) @@ -2067,6 +2170,38 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitUSUBO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + if (VT.isVector()) + return SDValue(); + + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), + DAG.getUNDEF(CarryVT)); + + // fold (usubo x, x) -> 0 + no borrow + if (N0 == N1) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getConstant(0, DL, CarryVT)); + + // fold (usubo x, 0) -> x + no borrow + if (isNullConstant(N1)) + return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); + + // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (isAllOnesConstant(N0)) + return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), + DAG.getConstant(0, DL, CarryVT)); + + return SDValue(); +} + SDValue DAGCombiner::visitSUBE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2132,6 +2267,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, 1) -> x if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnesValue()) { SDLoc DL(N); @@ -2298,6 +2437,31 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { return combined; } +static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // X / undef -> undef + // X % undef -> undef + if (N1.isUndef()) + return N1; + + // X / 0 --> undef + // X % 0 --> undef + // We don't need to preserve faults! + if (isNullConstantOrNullSplatConstant(N1)) + return DAG.getUNDEF(VT); + + // undef / X -> 0 + // undef % X -> 0 + if (N0.isUndef()) + return DAG.getConstant(0, DL, VT); + + return SDValue(); +} + SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2320,8 +2484,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(0, DL, VT), N0); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 @@ -2385,13 +2554,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (SDValue DivRem = useDivRem(N)) return DivRem; - // undef / X -> 0 - if (N0.isUndef()) - return DAG.getConstant(0, DL, VT); - // X / undef -> undef - if (N1.isUndef()) - return N1; - return SDValue(); } @@ -2415,6 +2577,12 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { N0C, N1C)) return Folded; + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // fold (udiv x, (1 << c)) -> x >>u c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1)) { @@ -2457,13 +2625,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue DivRem = useDivRem(N)) return DivRem; - // undef / X -> 0 - if (N0.isUndef()) - return DAG.getConstant(0, DL, VT); - // X / undef -> undef - if (N1.isUndef()) - return N1; - return SDValue(); } @@ -2483,26 +2644,29 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) return Folded; + if (SDValue V = simplifyDivRem(N, DAG)) + return V; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + if (isSigned) { // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UREM, DL, VT, N0, N1); } else { - // fold (urem x, pow2) -> (and x, pow2-1) + SDValue NegOne = DAG.getAllOnesConstant(DL, VT); if (DAG.isKnownToBeAPowerOfTwo(N1)) { - APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits()); - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); + // fold (urem x, pow2) -> (and x, pow2-1) + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); } - // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL && DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { - APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits()); - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0, Add); } @@ -2537,13 +2701,6 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (SDValue DivRem = useDivRem(N)) return DivRem.getValue(1); - // undef % X -> 0 - if (N0.isUndef()) - return DAG.getConstant(0, DL, VT); - // X % undef -> undef - if (N1.isUndef()) - return N1; - return SDValue(); } @@ -3191,6 +3348,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // reassociate and if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) return RAND; @@ -3726,12 +3887,9 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N1.getValueType(); // fold (or x, undef) -> -1 - if (!LegalOperations && - (N0.isUndef() || N1.isUndef())) { - EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; - return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), - SDLoc(LocReference), VT); - } + if (!LegalOperations && (N0.isUndef() || N1.isUndef())) + return DAG.getAllOnesConstant(SDLoc(LocReference), VT); + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) SDValue LL, LR, RL, RR, CC0, CC1; if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ @@ -3847,14 +4005,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, -1) -> -1, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) // do not return N0, because undef node may exist in N0 - return DAG.getConstant( - APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N), - N0.getValueType()); + return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 - return DAG.getConstant( - APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N), - N1.getValueType()); + return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting shuffle is legal. @@ -3939,6 +4093,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, -1) -> -1 if (isAllOnesConstant(N1)) return N1; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; @@ -4193,8 +4351,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { - APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); - SDValue Mask = DAG.getConstant(AllBits, DL, VT); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); if (LHSMask.getNode()) { APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); @@ -4280,7 +4437,8 @@ struct BaseIndexOffset { } /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) { + static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG, + int64_t PartialOffset = 0) { bool IsIndexSignExt = false; // Split up a folded GlobalAddress+Offset into its component parts. @@ -4289,7 +4447,7 @@ struct BaseIndexOffset { return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), SDLoc(GA), GA->getValueType(0), - /*Offset=*/0, + /*Offset=*/PartialOffset, /*isTargetGA=*/false, GA->getTargetFlags()), SDValue(), @@ -4301,14 +4459,13 @@ struct BaseIndexOffset { // instruction, then it could be just the BASE or everything else we don't // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // We know that we have at least an ADD instruction. Try to pattern match // the simple case of BASE + OFFSET. if (isa(Ptr->getOperand(1))) { int64_t Offset = cast(Ptr->getOperand(1))->getSExtValue(); - return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, - IsIndexSignExt); + return match(Ptr->getOperand(0), DAG, Offset + PartialOffset); } // Inside a loop the current BASE pointer is calculated using an ADD and a @@ -4317,7 +4474,7 @@ struct BaseIndexOffset { // (i64 mul (i64 %induction_var) // (i64 %element_size))) if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); @@ -4331,14 +4488,14 @@ struct BaseIndexOffset { // Either the case of Base + Index (no offset) or something else. if (IndexOffset->getOpcode() != ISD::ADD) - return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); + return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt); // Now we have the case of Base + Index + offset. SDValue Index = IndexOffset->getOperand(0); SDValue Offset = IndexOffset->getOperand(1); if (!isa(Offset)) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // Ignore signextends. if (Index->getOpcode() == ISD::SIGN_EXTEND) { @@ -4347,92 +4504,84 @@ struct BaseIndexOffset { } else IsIndexSignExt = false; int64_t Off = cast(Offset)->getSExtValue(); - return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); + return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt); } }; } // namespace namespace { -/// Represents the origin of an individual byte in load combine pattern. The -/// value of the byte is either unknown, zero or comes from memory. +/// Represents known origin of an individual byte in load combine pattern. The +/// value of the byte is either constant zero or comes from memory. struct ByteProvider { - enum ProviderTy { - Unknown, - ZeroConstant, - Memory - }; - - ProviderTy Kind; - // Load and ByteOffset are set for Memory providers only. + // For constant zero providers Load is set to nullptr. For memory providers // Load represents the node which loads the byte from memory. // ByteOffset is the offset of the byte in the value produced by the load. LoadSDNode *Load; unsigned ByteOffset; - ByteProvider() : Kind(ProviderTy::Unknown), Load(nullptr), ByteOffset(0) {} + ByteProvider() : Load(nullptr), ByteOffset(0) {} - static ByteProvider getUnknown() { - return ByteProvider(ProviderTy::Unknown, nullptr, 0); - } static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { - return ByteProvider(ProviderTy::Memory, Load, ByteOffset); - } - static ByteProvider getZero() { - return ByteProvider(ProviderTy::ZeroConstant, nullptr, 0); + return ByteProvider(Load, ByteOffset); } + static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } + + bool isConstantZero() const { return !Load; } + bool isMemory() const { return Load; } bool operator==(const ByteProvider &Other) const { - return Other.Kind == Kind && Other.Load == Load && - Other.ByteOffset == ByteOffset; + return Other.Load == Load && Other.ByteOffset == ByteOffset; } private: - ByteProvider(ProviderTy Kind, LoadSDNode *Load, unsigned ByteOffset) - : Kind(Kind), Load(Load), ByteOffset(ByteOffset) {} + ByteProvider(LoadSDNode *Load, unsigned ByteOffset) + : Load(Load), ByteOffset(ByteOffset) {} }; -/// Recursively traverses the expression collecting the origin of individual -/// bytes of the given value. For all the values except the root of the -/// expression verifies that it doesn't have uses outside of the expression. -const Optional > -collectByteProviders(SDValue Op, bool CheckNumberOfUses = false) { - if (CheckNumberOfUses && !Op.hasOneUse()) +/// Recursively traverses the expression calculating the origin of the requested +/// byte of the given value. Returns None if the provider can't be calculated. +/// +/// For all the values except the root of the expression verifies that the value +/// has exactly one use and if it's not true return None. This way if the origin +/// of the byte is returned it's guaranteed that the values which contribute to +/// the byte are not used outside of this expression. +/// +/// Because the parts of the expression are not allowed to have more than one +/// use this function iterates over trees, not DAGs. So it never visits the same +/// node more than once. +const Optional calculateByteProvider(SDValue Op, unsigned Index, + unsigned Depth, + bool Root = false) { + // Typical i64 by i8 pattern requires recursion up to 8 calls depth + if (Depth == 10) + return None; + + if (!Root && !Op.hasOneUse()) return None; - unsigned BitWidth = Op.getScalarValueSizeInBits(); + assert(Op.getValueType().isScalarInteger() && "can't handle other types"); + unsigned BitWidth = Op.getValueSizeInBits(); if (BitWidth % 8 != 0) return None; unsigned ByteWidth = BitWidth / 8; + assert(Index < ByteWidth && "invalid index requested"); + (void) ByteWidth; switch (Op.getOpcode()) { case ISD::OR: { - auto LHS = collectByteProviders(Op->getOperand(0), - /*CheckNumberOfUses=*/true); - auto RHS = collectByteProviders(Op->getOperand(1), - /*CheckNumberOfUses=*/true); - if (!LHS || !RHS) + auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1); + if (!LHS) + return None; + auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1); + if (!RHS) return None; - auto OR = [](ByteProvider LHS, ByteProvider RHS) { - if (LHS == RHS) - return LHS; - if (LHS.Kind == ByteProvider::Unknown || - RHS.Kind == ByteProvider::Unknown) - return ByteProvider::getUnknown(); - if (LHS.Kind == ByteProvider::Memory && RHS.Kind == ByteProvider::Memory) - return ByteProvider::getUnknown(); - - if (LHS.Kind == ByteProvider::Memory) - return LHS; - else - return RHS; - }; - - SmallVector Result(ByteWidth); - for (unsigned i = 0; i < LHS->size(); i++) - Result[i] = OR(LHS.getValue()[i], RHS.getValue()[i]); - - return Result; + if (LHS->isConstantZero()) + return RHS; + else if (RHS->isConstantZero()) + return LHS; + else + return None; } case ISD::SHL: { auto ShiftOp = dyn_cast(Op->getOperand(1)); @@ -4444,48 +4593,46 @@ collectByteProviders(SDValue Op, bool CheckNumberOfUses = false) { return None; uint64_t ByteShift = BitShift / 8; - auto Original = collectByteProviders(Op->getOperand(0), - /*CheckNumberOfUses=*/true); - if (!Original) - return None; - - SmallVector Result; - Result.insert(Result.begin(), ByteShift, ByteProvider::getZero()); - Result.insert(Result.end(), Original->begin(), - std::prev(Original->end(), ByteShift)); - assert(Result.size() == ByteWidth && - "Computed width doesn't match from type width"); - return Result; + return Index < ByteShift + ? ByteProvider::getConstantZero() + : calculateByteProvider(Op->getOperand(0), Index - ByteShift, + Depth + 1); } + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: { - auto Original = collectByteProviders(Op->getOperand(0), - /*CheckNumberOfUses=*/true); - if (!Original) + SDValue NarrowOp = Op->getOperand(0); + unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); + if (NarrowBitWidth % 8 != 0) return None; + uint64_t NarrowByteWidth = NarrowBitWidth / 8; - SmallVector Result; - unsigned NarrowByteWidth = Original->size(); - Result.insert(Result.begin(), Original->begin(), Original->end()); - Result.insert(Result.end(), ByteWidth - NarrowByteWidth, - ByteProvider::getZero()); - assert(Result.size() == ByteWidth && - "Computed width doesn't match from type width"); - return Result; + if (Index >= NarrowByteWidth) + return Op.getOpcode() == ISD::ZERO_EXTEND + ? Optional(ByteProvider::getConstantZero()) + : None; + else + return calculateByteProvider(NarrowOp, Index, Depth + 1); } + case ISD::BSWAP: + return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1, + Depth + 1); case ISD::LOAD: { auto L = cast(Op.getNode()); - if (L->isVolatile() || L->isIndexed() || - L->getExtensionType() != ISD::NON_EXTLOAD) + if (L->isVolatile() || L->isIndexed()) return None; - assert(BitWidth == L->getMemoryVT().getSizeInBits() && - "For non-extend loads widths must be the same"); - - SmallVector Result(ByteWidth); - for (unsigned i = 0; i < ByteWidth; i++) - Result[i] = ByteProvider::getMemory(L, i); + unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); + if (NarrowBitWidth % 8 != 0) + return None; + uint64_t NarrowByteWidth = NarrowBitWidth / 8; - return Result; + if (Index >= NarrowByteWidth) + return L->getExtensionType() == ISD::ZEXTLOAD + ? Optional(ByteProvider::getConstantZero()) + : None; + else + return ByteProvider::getMemory(L, Index); } } @@ -4507,6 +4654,22 @@ collectByteProviders(SDValue Op, bool CheckNumberOfUses = false) { /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] /// => /// i32 val = BSWAP(*((i32)a)) +/// +/// TODO: This rule matches complex patterns with OR node roots and doesn't +/// interact well with the worklist mechanism. When a part of the pattern is +/// updated (e.g. one of the loads) its direct users are put into the worklist, +/// but the root node of the pattern which triggers the load combine is not +/// necessarily a direct user of the changed node. For example, once the address +/// of t28 load is reassociated load combine won't be triggered: +/// t25: i32 = add t4, Constant:i32<2> +/// t26: i64 = sign_extend t25 +/// t27: i64 = add t2, t26 +/// t28: i8,ch = load t0, t27, undef:i64 +/// t29: i32 = zero_extend t28 +/// t32: i32 = shl t29, Constant:i8<8> +/// t33: i32 = or t23, t32 +/// As a possible fix visitLoad can check if the load can be a part of a load +/// combine pattern and add corresponding OR roots to the worklist. SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { assert(N->getOpcode() == ISD::OR && "Can only match load combining against OR nodes"); @@ -4515,97 +4678,112 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { EVT VT = N->getValueType(0); if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); + unsigned ByteWidth = VT.getSizeInBits() / 8; - // There is nothing to do here if the target can't load a value of this type const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.isOperationLegal(ISD::LOAD, VT)) + // Before legalize we can introduce too wide illegal loads which will be later + // split into legal sized loads. This enables us to combine i64 load by i8 + // patterns to a couple of i32 loads on 32 bit targets. + if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT)) return SDValue(); - // Calculate byte providers for the OR we are looking at - auto Res = collectByteProviders(SDValue(N, 0)); - if (!Res) - return SDValue(); - auto &Bytes = Res.getValue(); - unsigned ByteWidth = Bytes.size(); - assert(VT.getSizeInBits() == ByteWidth * 8 && - "collectByteProviders computed width differs from type width"); + std::function LittleEndianByteAt = []( + unsigned BW, unsigned i) { return i; }; + std::function BigEndianByteAt = []( + unsigned BW, unsigned i) { return BW - i - 1; }; - auto LittleEndianByteAt = [](unsigned BW, unsigned i) { return i; }; - auto BigEndianByteAt = [](unsigned BW, unsigned i) { return BW - i - 1; }; + bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); + auto MemoryByteOffset = [&] (ByteProvider P) { + assert(P.isMemory() && "Must be a memory byte provider"); + unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits(); + assert(LoadBitWidth % 8 == 0 && + "can only analyze providers for individual bytes not bit"); + unsigned LoadByteWidth = LoadBitWidth / 8; + return IsBigEndianTarget + ? BigEndianByteAt(LoadByteWidth, P.ByteOffset) + : LittleEndianByteAt(LoadByteWidth, P.ByteOffset); + }; Optional Base; SDValue Chain; SmallSet Loads; - LoadSDNode *FirstLoad = nullptr; + Optional FirstByteProvider; + int64_t FirstOffset = INT64_MAX; // Check if all the bytes of the OR we are looking at are loaded from the same // base address. Collect bytes offsets from Base address in ByteOffsets. SmallVector ByteOffsets(ByteWidth); for (unsigned i = 0; i < ByteWidth; i++) { - // All the bytes must be loaded from memory - if (Bytes[i].Kind != ByteProvider::Memory) + auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); + if (!P || !P->isMemory()) // All the bytes must be loaded from memory return SDValue(); - LoadSDNode *L = Bytes[i].Load; + LoadSDNode *L = P->Load; assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && - (L->getExtensionType() == ISD::NON_EXTLOAD) && - "Must be enforced by collectByteProviders"); + "Must be enforced by calculateByteProvider"); assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); // All loads must share the same chain SDValue LChain = L->getChain(); if (!Chain) Chain = LChain; - if (Chain != LChain) + else if (Chain != LChain) return SDValue(); // Loads must share the same base address BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG); if (!Base) Base = Ptr; - if (!Base->equalBaseIndex(Ptr)) + else if (!Base->equalBaseIndex(Ptr)) return SDValue(); // Calculate the offset of the current byte from the base address - unsigned LoadBitWidth = L->getMemoryVT().getSizeInBits(); - assert(LoadBitWidth % 8 == 0 && - "can only analyze providers for individual bytes not bit"); - unsigned LoadByteWidth = LoadBitWidth / 8; - int64_t MemoryByteOffset = - DAG.getDataLayout().isBigEndian() - ? BigEndianByteAt(LoadByteWidth, Bytes[i].ByteOffset) - : LittleEndianByteAt(LoadByteWidth, Bytes[i].ByteOffset); - int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset; + int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P); ByteOffsets[i] = ByteOffsetFromBase; // Remember the first byte load - if (ByteOffsetFromBase == 0) - FirstLoad = L; + if (ByteOffsetFromBase < FirstOffset) { + FirstByteProvider = P; + FirstOffset = ByteOffsetFromBase; + } Loads.insert(L); } - assert(Base && "must be set"); - assert(Loads.size() > 0 && "must be at least one load"); + assert(Loads.size() > 0 && "All the bytes of the value must be loaded from " + "memory, so there must be at least one load which produces the value"); + assert(Base && "Base address of the accessed memory location must be set"); + assert(FirstOffset != INT64_MAX && "First byte offset must be set"); // Check if the bytes of the OR we are looking at match with either big or // little endian value load bool BigEndian = true, LittleEndian = true; for (unsigned i = 0; i < ByteWidth; i++) { - LittleEndian &= ByteOffsets[i] == LittleEndianByteAt(ByteWidth, i); - BigEndian &= ByteOffsets[i] == BigEndianByteAt(ByteWidth, i); + int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; + LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i); + BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i); if (!BigEndian && !LittleEndian) return SDValue(); } assert((BigEndian != LittleEndian) && "should be either or"); - assert(FirstLoad && "must be set"); + assert(FirstByteProvider && "must be set"); + + // Ensure that the first byte is loaded from zero offset of the first load. + // So the combined value can be loaded from the first load address. + if (MemoryByteOffset(*FirstByteProvider) != 0) + return SDValue(); + LoadSDNode *FirstLoad = FirstByteProvider->Load; // The node we are looking at matches with the pattern, check if we can // replace it with a single load and bswap if needed. // If the load needs byte swap check if the target supports it - bool NeedsBswap = DAG.getDataLayout().isBigEndian() != BigEndian; - if (NeedsBswap && !TLI.isOperationLegal(ISD::BSWAP, VT)) + bool NeedsBswap = IsBigEndianTarget != BigEndian; + + // Before legalize we can introduce illegal bswaps which will be later + // converted to an explicit bswap sequence. This way we end up with a single + // load and byte shuffling instead of several loads and byte shuffling. + if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); // Check that a load of the wide type is both allowed and fast on the target @@ -4624,10 +4802,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { for (LoadSDNode *L : Loads) DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); - if (NeedsBswap) - return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad); - else - return NewLoad; + return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; } SDValue DAGCombiner::visitXOR(SDNode *N) { @@ -4667,6 +4842,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (xor x, 0) -> x if (isNullConstant(N1)) return N0; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // reassociate xor if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) return RXOR; @@ -4684,9 +4863,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: - return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); + return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: - return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), + return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); } } @@ -4825,16 +5004,20 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); if (!BinOpCst) return SDValue(); - // FIXME: disable this unless the input to the binop is a shift by a constant. - // If it is not a shift, it pessimizes some common cases like: - // - // void foo(int *X, int i) { X[i & 1235] = 1; } - // int bar(int *X, int i) { return X[i & 255]; } + // FIXME: disable this unless the input to the binop is a shift by a constant + // or is copy/select.Enable this in other cases when figure out it's exactly profitable. SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); - if ((BinOpLHSVal->getOpcode() != ISD::SHL && - BinOpLHSVal->getOpcode() != ISD::SRA && - BinOpLHSVal->getOpcode() != ISD::SRL) || - !isa(BinOpLHSVal->getOperand(1))) + bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL || + BinOpLHSVal->getOpcode() == ISD::SRA || + BinOpLHSVal->getOpcode() == ISD::SRL; + bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg || + BinOpLHSVal->getOpcode() == ISD::SELECT; + + if ((!isShift || !isa(BinOpLHSVal->getOperand(1))) && + !isCopyOrSelect) + return SDValue(); + + if (isCopyOrSelect && N->hasOneUse()) return SDValue(); EVT VT = N->getValueType(0); @@ -4950,6 +5133,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl undef, x) -> 0 if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) @@ -5085,9 +5272,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && isConstantOrConstantVector(N1, /* No Opaques */ true)) { - unsigned BitSize = VT.getScalarSizeInBits(); SDLoc DL(N); - SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT); + SDValue AllBits = DAG.getAllOnesConstant(DL, VT); SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); } @@ -5154,6 +5340,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) return N0; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { @@ -5301,6 +5491,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl x, 0) -> x if (N1C && N1C->isNullValue()) return N0; + + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // if (srl x, c) is known to be zero, return 0 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) @@ -5351,9 +5545,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && isConstantOrConstantVector(N1, /* NoOpaques */ true)) { SDLoc DL(N); - APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits()); SDValue Mask = - DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1); + DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); AddToWorklist(Mask.getNode()); return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); } @@ -5494,7 +5687,11 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + // fold (bitreverse c1) -> c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0); // fold (bitreverse (bitreverse x)) -> x if (N0.getOpcode() == ISD::BITREVERSE) return N0.getOperand(0); @@ -5588,7 +5785,6 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } -// TODO: We should handle other cases of selecting between {-1,0,1} here. SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5597,6 +5793,67 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { EVT CondVT = Cond.getValueType(); SDLoc DL(N); + if (!VT.isInteger()) + return SDValue(); + + auto *C1 = dyn_cast(N1); + auto *C2 = dyn_cast(N2); + if (!C1 || !C2) + return SDValue(); + + // Only do this before legalization to avoid conflicting with target-specific + // transforms in the other direction (create a select from a zext/sext). There + // is also a target-independent combine here in DAGCombiner in the other + // direction for (select Cond, -1, 0) when the condition is not i1. + if (CondVT == MVT::i1 && !LegalOperations) { + if (C1->isNullValue() && C2->isOne()) { + // select Cond, 0, 1 --> zext (!Cond) + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + if (VT != MVT::i1) + NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); + return NotCond; + } + if (C1->isNullValue() && C2->isAllOnesValue()) { + // select Cond, 0, -1 --> sext (!Cond) + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + if (VT != MVT::i1) + NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); + return NotCond; + } + if (C1->isOne() && C2->isNullValue()) { + // select Cond, 1, 0 --> zext (Cond) + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + return Cond; + } + if (C1->isAllOnesValue() && C2->isNullValue()) { + // select Cond, -1, 0 --> sext (Cond) + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); + return Cond; + } + + // For any constants that differ by 1, we can transform the select into an + // extend and add. Use a target hook because some targets may prefer to + // transform in the other direction. + if (TLI.convertSelectOfConstantsToMath()) { + if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } + if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } + } + + return SDValue(); + } + // fold (select Cond, 0, 1) -> (xor Cond, 1) // We can't do this reliably if integer based booleans have different contents // to floating point based booleans. This is because we can't tell whether we @@ -5606,15 +5863,14 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // undiscoverable (or not reasonably discoverable). For example, it could be // in another basic block or it could require searching a complicated // expression. - if (VT.isInteger() && - (CondVT == MVT::i1 || (CondVT.isInteger() && - TLI.getBooleanContents(false, true) == - TargetLowering::ZeroOrOneBooleanContent && - TLI.getBooleanContents(false, false) == - TargetLowering::ZeroOrOneBooleanContent)) && - isNullConstant(N1) && isOneConstant(N2)) { - SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond, - DAG.getConstant(1, DL, CondVT)); + if (CondVT.isInteger() && + TLI.getBooleanContents(false, true) == + TargetLowering::ZeroOrOneBooleanContent && + TLI.getBooleanContents(false, false) == + TargetLowering::ZeroOrOneBooleanContent && + C1->isNullValue() && C2->isOne()) { + SDValue NotCond = + DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); if (VT.bitsEq(CondVT)) return NotCond; return DAG.getZExtOrTrunc(NotCond, DL, VT); @@ -5638,8 +5894,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold (select false, X, Y) -> Y return !N0C->isNullValue() ? N1 : N2; } - // fold (select C, 1, X) -> (or C, X) - if (VT == MVT::i1 && isOneConstant(N1)) + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or C, Y) + if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); if (SDValue V = foldSelectOfConstants(N)) @@ -5657,16 +5914,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } - // fold (select C, X, 0) -> (and C, X) - if (VT == MVT::i1 && isNullConstant(N2)) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); - // fold (select X, X, Y) -> (or X, Y) - // fold (select X, 1, Y) -> (or X, Y) - if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) - if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) + if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. @@ -5747,7 +5997,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } // select (xor Cond, 1), X, Y -> select Cond, Y, X - // select (xor Cond, 0), X, Y -> selext Cond, X, Y if (VT0 == MVT::i1) { if (N0->getOpcode() == ISD::XOR) { if (auto *C = dyn_cast(N0->getOperand(1))) { @@ -5755,9 +6004,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (C->isOne()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N2, N1); - else - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), - Cond0, N1, N2); } } } @@ -6134,7 +6380,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { ISD::NON_EXTLOAD, MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MLD->isExpandingLoad()); + MLD->isExpandingLoad()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -6560,6 +6806,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, LegalOperations)) @@ -6568,8 +6815,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, - N0.getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)); if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) @@ -6601,12 +6847,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign // bits, just sext from i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op); } else { // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign // bits, just truncate to i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); + return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); } // fold (sext (truncate x)) -> (sextinreg x). @@ -6616,7 +6862,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); else if (OpBits > DestBits) Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, DAG.getValueType(N0.getValueType())); } } @@ -6636,16 +6882,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, - LN0->getChain(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), - ISD::SIGN_EXTEND); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -6663,8 +6907,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, - LN0->getChain(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); @@ -6698,7 +6941,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getMemOperand()); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); - SDLoc DL(N); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, @@ -6706,24 +6948,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, - ISD::SIGN_EXTEND); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } if (N0.getOpcode() == ISD::SETCC) { - EVT N0VT = N0.getOperand(0).getValueType(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + ISD::CondCode CC = cast(N0.getOperand(2))->get(); + EVT N00VT = N0.getOperand(0).getValueType(); + // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(N0VT) == + TLI.getBooleanContents(N00VT) == TargetLowering::ZeroOrNegativeOneBooleanContent) { // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. - EVT SVT = getSetCCResultType(N0VT); + EVT SVT = getSetCCResultType(N00VT); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result @@ -6731,19 +6976,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == SVT.getSizeInBits()) - return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); + return DAG.getSetCC(DL, VT, N00, N01, CC); // If the desired elements are smaller or larger than the source - // elements we can use a matching integer vector type and then - // truncate/sign extend - EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); - if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, - N0.getOperand(0), N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); + // elements, we can use a matching integer vector type and then + // truncate/sign extend. + EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); + if (SVT == MatchingVecType) { + SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); + return DAG.getSExtOrTrunc(VsetCC, DL, VT); } } @@ -6752,36 +6993,30 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // getBooleanContents(). unsigned SetCCWidth = N0.getScalarValueSizeInBits(); - SDLoc DL(N); // To determine the "true" side of the select, we need to know the high bit // of the value returned by the setcc if it evaluates to true. // If the type of the setcc is i1, then the true case of the select is just // sext(i1 1), that is, -1. // If the type of the setcc is larger (say, i8) then the value of the high - // bit depends on getBooleanContents(). So, ask TLI for a real "true" value + // bit depends on getBooleanContents(), so ask TLI for a real "true" value // of the appropriate width. - SDValue ExtTrueVal = - (SetCCWidth == 1) - ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), - DL, VT) - : TLI.getConstTrueVal(DAG, VT, DL); - - if (SDValue SCC = SimplifySelectCC( - DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal, - DAG.getConstant(0, DL, VT), - cast(N0.getOperand(2))->get(), true)) + SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT) + : TLI.getConstTrueVal(DAG, VT, DL); + SDValue Zero = DAG.getConstant(0, DL, VT); + if (SDValue SCC = + SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) return SCC; if (!VT.isVector()) { - EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) { - SDLoc DL(N); - ISD::CondCode CC = cast(N0.getOperand(2))->get(); - SDValue SetCC = - DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); - return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, - DAG.getConstant(0, DL, VT)); + EVT SetCCVT = getSetCCResultType(N00VT); + // Don't do this transform for i1 because there's a select transform + // that would reverse it. + // TODO: We should not do this transform at all without a target hook + // because a sext is likely cheaper than a select? + if (SetCCVT.getScalarSizeInBits() != 1 && + (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) { + SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); + return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); } } } @@ -6789,7 +7024,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); return SDValue(); } @@ -7278,9 +7513,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitAssertZext(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT EVT = cast(N1)->getVT(); + + // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt) + if (N0.getOpcode() == ISD::AssertZext && + EVT == cast(N0.getOperand(1))->getVT()) + return N0; + + return SDValue(); +} + /// See if the specified operand can be simplified with the knowledge that only /// the bits specified by Mask are used. If so, return the simpler operand, /// otherwise return a null SDValue. +/// +/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can +/// simplify nodes with multiple uses more aggressively.) SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; @@ -7316,6 +7567,14 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } + break; + case ISD::AND: { + // X & -1 -> X (ignoring bits which aren't demanded). + ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1)); + if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask) + return V.getOperand(0); + break; + } } return SDValue(); } @@ -7539,6 +7798,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } + // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x) + if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || + N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || + N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && + N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) { + if (!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) + return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT); + } + // fold (sext_in_reg (zext x)) -> (sext x) // iff we are extending the source sign bit. if (N0.getOpcode() == ISD::ZERO_EXTEND) { @@ -7549,7 +7818,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. - if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) + if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1))) return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); // fold operands of sext_in_reg based on knowledge that the top bits are not @@ -7791,6 +8060,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { VT.getSizeInBits()))) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } + // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { @@ -7812,6 +8082,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } } + // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), // where ... are all 'undef'. if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { @@ -7877,6 +8148,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) + // When the adde's carry is not used. + if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() && + !N0.getNode()->hasAnyUseOfValue(1) && + (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) { + SDLoc SL(N); + auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); + auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); + return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue), + X, Y, N0.getOperand(2)); + } + return SDValue(); } @@ -8418,10 +8701,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == PreferredFusedOpcode && - N0.getOperand(2).getOpcode() == ISD::FMUL) { + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8431,8 +8718,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FMUL) { + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1->getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL && + N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8660,11 +8951,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((AllowFusion || HasFMAD) && Aggressive) { + if (Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode && - N0.getOperand(2).getOpcode() == ISD::FMUL) { + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -8676,7 +8971,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == PreferredFusedOpcode && + // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF + // are currently only supported on binary nodes. + if (Options.UnsafeFPMath && + N1.getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); @@ -8904,6 +9202,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) @@ -8917,7 +9218,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { GetNegatedExpression(N0, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { // fold (fadd A, 0) -> A if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) if (N1C->isZero()) @@ -9051,13 +9352,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N1CFP) return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, DL, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -9130,6 +9434,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (N1CFP && N1CFP->isExactlyValue(1.0)) return N0; + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + if (Options.UnsafeFPMath) { // fold (fmul A, 0) -> 0 if (N1CFP && N1CFP->isZero()) @@ -9384,6 +9691,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + if (Options.UnsafeFPMath) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { @@ -9487,6 +9797,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, &cast(N)->Flags); + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; + return SDValue(); } @@ -12724,8 +13037,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) return SDValue(); - if (!TLI.isMultiStoresCheaperThanBitsMerge(Lo.getOperand(0), - Hi.getOperand(0))) + // Use the EVT of low and high parts before bitcast as the input + // of target query. + EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) + ? Lo.getOperand(0).getValueType() + : Lo.getValueType(); + EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) + ? Hi.getOperand(0).getValueType() + : Hi.getValueType(); + if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) return SDValue(); // Start to split store. @@ -12766,10 +13086,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { EVT VT = InVec.getValueType(); - // If we can't generate a legal BUILD_VECTOR, exit - if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return SDValue(); - // Check that we know which element is being inserted if (!isa(EltNo)) return SDValue(); @@ -12796,6 +13112,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } } + // If we can't generate a legal BUILD_VECTOR, exit + if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return SDValue(); + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. @@ -12817,11 +13137,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // All the operands of BUILD_VECTOR must have the same type; // we enforce that here. EVT OpVT = Ops[0].getValueType(); - if (InVal.getValueType() != OpVT) - InVal = OpVT.bitsGT(InVal.getValueType()) ? - DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) : - DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal); - Ops[Elt] = InVal; + Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; } // Return the new vector @@ -12841,6 +13157,11 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) return SDValue(); + ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? + ISD::NON_EXTLOAD : ISD::EXTLOAD; + if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) + return SDValue(); + Align = NewAlign; SDValue NewPtr = OriginalLoad->getBasePtr(); @@ -13295,7 +13616,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { return DAG.getNode(Opcode, DL, VT, BV); } -SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N, +SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx) { @@ -13345,9 +13666,15 @@ SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N, !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) return SDValue(); - if (InVT1 != InVT2) + // Legalizing INSERT_SUBVECTOR is tricky - you basically have to + // lower it back into a BUILD_VECTOR. So if the inserted type is + // illegal, don't even try. + if (InVT1 != InVT2) { + if (!TLI.isTypeLegal(InVT2)) + return SDValue(); VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); + } ShuffleNumElems = NumElems * 2; } else { // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider @@ -13878,15 +14205,19 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); - if (V->getOpcode() == ISD::CONCAT_VECTORS) { - // Combine: - // (extract_subvec (concat V1, V2, ...), i) - // Into: - // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same - // type. - if (V->getOperand(0).getValueType() != NVT) - return SDValue(); + // Extract from UNDEF is UNDEF. + if (V.isUndef()) + return DAG.getUNDEF(NVT); + + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same + // type. + if (V->getOpcode() == ISD::CONCAT_VECTORS && + isa(N->getOperand(1)) && + V->getOperand(0).getValueType() == NVT) { unsigned Idx = N->getConstantOperandVal(1); unsigned NumElems = NVT.getVectorNumElements(); assert((Idx % NumElems) == 0 && @@ -13900,19 +14231,16 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { // Handle only simple case where vector being inserted and vector - // being extracted are of same type, and are half size of larger vectors. - EVT BigVT = V->getOperand(0).getValueType(); + // being extracted are of same size. EVT SmallVT = V->getOperand(1).getValueType(); - if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) + if (!NVT.bitsEq(SmallVT)) return SDValue(); - // Only handle cases where both indexes are constants with the same type. + // Only handle cases where both indexes are constants. ConstantSDNode *ExtIdx = dyn_cast(N->getOperand(1)); ConstantSDNode *InsIdx = dyn_cast(V->getOperand(2)); - if (InsIdx && ExtIdx && - InsIdx->getValueType(0).getSizeInBits() <= 64 && - ExtIdx->getValueType(0).getSizeInBits() <= 64) { + if (InsIdx && ExtIdx) { // Combine: // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) // Into: @@ -14083,16 +14411,20 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. -// This combine is done in the following cases: -// 1. Both N0,N1 are BUILD_VECTOR's composed of constants or undefs. -// 2. Only one of N0,N1 is a BUILD_VECTOR composed of constants or undefs - -// Combine iff that node is ALL_ZEROS. We prefer not to combine a -// BUILD_VECTOR of all constants to allow efficient materialization of -// constant vectors, but the ALL_ZEROS is an exception because -// zero-extension matching seems to rely on having BUILD_VECTOR nodes with -// zero padding between elements. FIXME: Eliminate this exception for -// ALL_ZEROS constant vectors. -// 3. Neither N0,N1 are composed of only constants. +// +// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always +// a simplification in some sense, but it isn't appropriate in general: some +// BUILD_VECTORs are substantially cheaper than others. The general case +// of a BUILD_VECTOR requires inserting each element individually (or +// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of +// all constants is a single constant pool load. A BUILD_VECTOR where each +// element is identical is a splat. A BUILD_VECTOR where most of the operands +// are undef lowers to a small number of element insertions. +// +// To deal with this, we currently use a bunch of mostly arbitrary heuristics. +// We don't fold shuffles where one side is a non-zero constant, and we don't +// fold shuffles if the resulting BUILD_VECTOR would have duplicate +// non-constant operands. This seems to work out reasonably well in practice. static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -14115,6 +14447,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, } SmallVector Ops; + SmallSet DuplicateOps; for (int M : SVN->getMask()) { SDValue Op = DAG.getUNDEF(VT.getScalarType()); if (M >= 0) { @@ -14130,6 +14463,14 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, return SDValue(); } } + + // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is + // fine, but it's likely to generate low-quality code if the target can't + // reconstruct an appropriate shuffle. + if (!Op.isUndef() && !isa(Op) && !isa(Op)) + if (!DuplicateOps.insert(Op).second) + return SDValue(); + Ops.push_back(Op); } // BUILD_VECTOR requires all inputs to be of the same type, find the @@ -14146,6 +14487,113 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, return DAG.getBuildVector(VT, SDLoc(SVN), Ops); } +// Match shuffles that can be converted to any_vector_extend_in_reg. +// This is often generated during legalization. +// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) +// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. +SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { + EVT VT = SVN->getValueType(0); + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + + // TODO Add support for big-endian when we have a test case. + if (!VT.isInteger() || IsBigEndian) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + ArrayRef Mask = SVN->getMask(); + SDValue N0 = SVN->getOperand(0); + + // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) + auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] < 0) + continue; + if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale)) + continue; + return false; + } + return true; + }; + + // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for + // power-of-2 extensions as they are the most likely. + for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { + if (!isAnyExtend(Scale)) + continue; + + EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) + return DAG.getBitcast(VT, + DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT)); + } + + return SDValue(); +} + +// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of +// each source element of a large type into the lowest elements of a smaller +// destination type. This is often generated during legalization. +// If the source node itself was a '*_extend_vector_inreg' node then we should +// then be able to remove it. +SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { + EVT VT = SVN->getValueType(0); + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + + // TODO Add support for big-endian when we have a test case. + if (!VT.isInteger() || IsBigEndian) + return SDValue(); + + SDValue N0 = SVN->getOperand(0); + while (N0.getOpcode() == ISD::BITCAST) + N0 = N0.getOperand(0); + + unsigned Opcode = N0.getOpcode(); + if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && + Opcode != ISD::SIGN_EXTEND_VECTOR_INREG && + Opcode != ISD::ZERO_EXTEND_VECTOR_INREG) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + ArrayRef Mask = SVN->getMask(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); + + // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> + // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> + // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1> + auto isTruncate = [&Mask, &NumElts](unsigned Scale) { + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] < 0) + continue; + if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale)) + continue; + return false; + } + return true; + }; + + // At the moment we just handle the case where we've truncated back to the + // same size as before the extension. + // TODO: handle more extension/truncation cases as cases arise. + if (EltSizeInBits != ExtSrcSizeInBits) + return SDValue(); + + // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for + // power-of-2 truncations as they are the most likely. + for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) + if (isTruncate(Scale)) + return DAG.getBitcast(VT, N00); + + return SDValue(); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -14250,6 +14698,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) return S; + // Match shuffles that can be converted to any_vector_extend_in_reg. + if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) + return V; + + // Combine "truncate_vector_in_reg" style shuffles. + if (SDValue V = combineTruncationShuffle(SVN, DAG)) + return V; + if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && (N1.isUndef() || @@ -14371,6 +14827,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast(N0); + // Don't try to fold splats; they're likely to simplify somehow, or they + // might be free. + if (OtherSV->isSplat()) + return SDValue(); + // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(OtherSV->getOperand(0).getValueType() == VT && @@ -14502,6 +14963,16 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + // If inserting an UNDEF, just return the original vector. + if (N1.isUndef()) + return N0; + + // If this is an insert of an extracted vector into an undef vector, we can + // just use the input to the extract. + if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) + return N1.getOperand(0); + // Combine INSERT_SUBVECTORs where we are inserting to the same index. // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) @@ -14511,26 +14982,39 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), N1, N2); - if (N0.getValueType() != N1.getValueType()) + if (!isa(N2)) return SDValue(); + unsigned InsIdx = cast(N2)->getZExtValue(); + + // Canonicalize insert_subvector dag nodes. + // Example: + // (insert_subvector (insert_subvector A, Idx0), Idx1) + // -> (insert_subvector (insert_subvector A, Idx1), Idx0) + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && + N1.getValueType() == N0.getOperand(1).getValueType() && + isa(N0.getOperand(2))) { + unsigned OtherIdx = cast(N0.getOperand(2))->getZExtValue(); + if (InsIdx < OtherIdx) { + // Swap nodes. + SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, + N0.getOperand(0), N1, N2); + AddToWorklist(NewOp.getNode()); + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()), + VT, NewOp, N0.getOperand(1), N0.getOperand(2)); + } + } + // If the input vector is a concatenation, and the insert replaces - // one of the halves, we can optimize into a single concat_vectors. - if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && - N2.getOpcode() == ISD::Constant) { - APInt InsIdx = cast(N2)->getAPIntValue(); - - // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> - // (concat_vectors Z, Y) - if (InsIdx == 0) - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1, - N0.getOperand(1)); - - // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> - // (concat_vectors X, Z) - if (InsIdx == VT.getVectorNumElements() / 2) - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0), - N1); + // one of the pieces, we can optimize into a single concat_vectors. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && + N0.getOperand(0).getValueType() == N1.getValueType()) { + unsigned Factor = N1.getValueType().getVectorNumElements(); + + SmallVector Ops(N0->op_begin(), N0->op_end()); + Ops[cast(N2)->getZExtValue() / Factor] = N1; + + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index e2f33bb..fcd77a8 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1,4 +1,4 @@ -//===-- FastISel.cpp - Implementation of the FastISel class ---------------===// +//===- FastISel.cpp - Implementation of the FastISel class ----------------===// // // The LLVM Compiler Infrastructure // @@ -39,35 +39,76 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "isel" @@ -646,7 +687,7 @@ bool FastISel::selectStackmap(const CallInst *I) { MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::STACKMAP)); for (auto const &MO : Ops) - MIB.addOperand(MO); + MIB.add(MO); // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); @@ -826,7 +867,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { TII.get(TargetOpcode::PATCHPOINT)); for (auto &MO : Ops) - MIB.addOperand(MO); + MIB.add(MO); MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI); @@ -1149,7 +1190,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op) + .add(*Op) .addImm(0) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); @@ -1362,7 +1403,7 @@ bool FastISel::selectInstruction(const Instruction *I) { if (const auto *Call = dyn_cast(I)) { const Function *F = Call->getCalledFunction(); - LibFunc::Func Func; + LibFunc Func; // As a special case, don't handle calls to builtin library functions that // may be translated directly to target instructions. @@ -1665,7 +1706,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo, TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), SkipTargetIndependentISel(SkipTargetIndependentISel) {} -FastISel::~FastISel() {} +FastISel::~FastISel() = default; bool FastISel::fastLowerArguments() { return false; } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 7c814bf..377a523 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -125,11 +125,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. - Function::const_iterator BB = Fn->begin(), EB = Fn->end(); - for (; BB != EB; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - if (const AllocaInst *AI = dyn_cast(I)) { + for (const BasicBlock &BB : *Fn) { + for (const Instruction &I : BB) { + if (const AllocaInst *AI = dyn_cast(&I)) { Type *Ty = AI->getAllocatedType(); unsigned Align = std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), @@ -138,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Static allocas can be folded into the initial stack frame // adjustment. For targets that don't realign the stack, don't // do this if there is an extra alignment requirement. - if (AI->isStaticAlloca() && + if (AI->isStaticAlloca() && (TFI->isStackRealignable() || (Align <= StackAlign))) { const ConstantInt *CUI = cast(AI->getArraySize()); uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); @@ -175,14 +173,13 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for inline asm that clobbers the SP register. if (isa(I) || isa(I)) { - ImmutableCallSite CS(&*I); + ImmutableCallSite CS(&I); if (isa(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector Ops = TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS); - for (size_t I = 0, E = Ops.size(); I != E; ++I) { - TargetLowering::AsmOperandInfo &Op = Ops[I]; + for (TargetLowering::AsmOperandInfo &Op : Ops) { if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). TLI->ComputeConstraintToUse(Op, SDValue(), DAG); @@ -199,28 +196,28 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for calls to the @llvm.va_start intrinsic. We can omit some // prologue boilerplate for variadic functions that don't examine their // arguments. - if (const auto *II = dyn_cast(I)) { + if (const auto *II = dyn_cast(&I)) { if (II->getIntrinsicID() == Intrinsic::vastart) MF->getFrameInfo().setHasVAStart(true); } // If we have a musttail call in a variadic function, we need to ensure we // forward implicit register parameters. - if (const auto *CI = dyn_cast(I)) { + if (const auto *CI = dyn_cast(&I)) { if (CI->isMustTailCall() && Fn->isVarArg()) MF->getFrameInfo().setHasMustTailInVarArgFunc(true); } // Mark values used outside their block as exported, by allocating // a virtual register for them. - if (isUsedOutsideOfDefiningBlock(&*I)) - if (!isa(I) || !StaticAllocaMap.count(cast(I))) - InitializeRegForValue(&*I); + if (isUsedOutsideOfDefiningBlock(&I)) + if (!isa(I) || !StaticAllocaMap.count(cast(&I))) + InitializeRegForValue(&I); // Collect llvm.dbg.declare information. This is done now instead of // during the initial isel pass through the IR so that it is done // in a predictable order. - if (const DbgDeclareInst *DI = dyn_cast(I)) { + if (const DbgDeclareInst *DI = dyn_cast(&I)) { assert(DI->getVariable() && "Missing variable"); assert(DI->getDebugLoc() && "Missing location"); if (MMI.hasDebugInfo()) { @@ -245,47 +242,52 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } // Decide the preferred extend type for a value. - PreferredExtendType[&*I] = getPreferredExtendForValue(&*I); + PreferredExtendType[&I] = getPreferredExtendForValue(&I); } + } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. - for (BB = Fn->begin(); BB != EB; ++BB) { + for (const BasicBlock &BB : *Fn) { // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks // are really data, and no instructions can live here. - if (BB->isEHPad()) { - const Instruction *I = BB->getFirstNonPHI(); + if (BB.isEHPad()) { + const Instruction *PadInst = BB.getFirstNonPHI(); // If this is a non-landingpad EH pad, mark this function as using // funclets. // FIXME: SEH catchpads do not create funclets, so we could avoid setting // this in such cases in order to improve frame layout. - if (!isa(I)) { + if (!isa(PadInst)) { MF->setHasEHFunclets(true); MF->getFrameInfo().setHasOpaqueSPAdjustment(true); } - if (isa(I)) { - assert(&*BB->begin() == I && + if (isa(PadInst)) { + assert(&*BB.begin() == PadInst && "WinEHPrepare failed to remove PHIs from imaginary BBs"); continue; } - if (isa(I)) - assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs"); + if (isa(PadInst)) + assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs"); } - MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB); - MBBMap[&*BB] = MBB; + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&BB); + MBBMap[&BB] = MBB; MF->push_back(MBB); // Transfer the address-taken flag. This is necessary because there could // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only // the first one should be marked. - if (BB->hasAddressTaken()) + if (BB.hasAddressTaken()) MBB->setHasAddressTaken(); + // Mark landing pad blocks. + if (BB.isEHPad()) + MBB->setIsEHPad(); + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. - for (BasicBlock::const_iterator I = BB->begin(); + for (BasicBlock::const_iterator I = BB.begin(); const PHINode *PN = dyn_cast(I); ++I) { if (PN->use_empty()) continue; @@ -299,8 +301,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SmallVector ValueVTs; ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs); - for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { - EVT VT = ValueVTs[vti]; + for (EVT VT : ValueVTs) { unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) @@ -310,16 +311,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } - // Mark landing pad blocks. - SmallVector LPads; - for (BB = Fn->begin(); BB != EB; ++BB) { - const Instruction *FNP = BB->getFirstNonPHI(); - if (BB->isEHPad() && MBBMap.count(&*BB)) - MBBMap[&*BB]->setIsEHPad(); - if (const auto *LPI = dyn_cast(FNP)) - LPads.push_back(LPI); - } - if (!isFuncletEHPersonality(Personality)) return; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 14d1481..242aca0 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -235,7 +235,6 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. - unsigned NumResults = CountResults(Node); VRBase = cast(Node->getOperand(i-NumResults))->getReg(); assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); MIB.addReg(VRBase, RegState::Define); @@ -502,8 +501,17 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); - unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - MachineInstr *DefMI = MRI->getVRegDef(VReg); + unsigned Reg; + MachineInstr *DefMI; + RegisterSDNode *R = dyn_cast(Node->getOperand(0)); + if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + Reg = R->getReg(); + DefMI = nullptr; + } else { + Reg = getVR(Node->getOperand(0), VRBaseMap); + DefMI = MRI->getVRegDef(Reg); + } + unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && @@ -519,20 +527,26 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); MRI->clearKillFlags(SrcReg); } else { - // VReg may not support a SubIdx sub-register, and we may need to + // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - VReg = ConstrainForSubReg(VReg, SubIdx, - Node->getOperand(0).getSimpleValueType(), - Node->getDebugLoc()); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + Reg = ConstrainForSubReg(Reg, SubIdx, + Node->getOperand(0).getSimpleValueType(), + Node->getDebugLoc()); // Create the destreg if it is missing. if (VRBase == 0) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); + MachineInstrBuilder CopyMI = + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + CopyMI.addReg(Reg, 0, SubIdx); + else + CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 79c1c67..66f981c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -331,8 +331,6 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, // supported by the target. EVT VT = Tmp1.getValueType(); EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = Tmp3.getValueType(); - EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.CreateStackTemporary(VT); int SPFI = cast(StackPtr.getNode())->getIndex(); @@ -342,13 +340,8 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, DAG.getEntryNode(), dl, Tmp1, StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); - // Truncate or zero extend offset to target pointer type. - Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT); - // Add the offset to the index. - unsigned EltSize = EltVT.getSizeInBits()/8; - Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3, - DAG.getConstant(EltSize, dl, IdxVT)); - SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); + SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3); + // Store the scalar value. Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT); // Load the updated vector. @@ -1211,20 +1204,16 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { } } + EVT VecVT = Vec.getValueType(); + if (!Ch.getNode()) { // Store the value to a temporary stack slot, then LOAD the returned part. - StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + StackPtr = DAG.CreateStackTemporary(VecVT); Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); } - // Add the offset to the index. - unsigned EltSize = Vec.getScalarValueSizeInBits() / 8; - Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, - DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); - - Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); - StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); SDValue NewLoad; @@ -1234,7 +1223,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { else NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), - Vec.getValueType().getVectorElementType()); + VecVT.getVectorElementType()); // Replace the chain going out of the store, by the one out of the load. DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); @@ -1258,8 +1247,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. - - SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + EVT VecVT = Vec.getValueType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int FI = cast(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); @@ -1268,16 +1257,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Then store the inserted part. - - // Add the offset to the index. - unsigned EltSize = Vec.getScalarValueSizeInBits() / 8; - - Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, - DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); - Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); - - SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, - StackPtr); + SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); // Store the subvector. Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo()); @@ -2554,12 +2534,12 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0); APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0); for (unsigned J = 0; J != Sz; J += 8) { - MaskHi4 = MaskHi4.Or(APInt(Sz, 0xF0ull << J)); - MaskLo4 = MaskLo4.Or(APInt(Sz, 0x0Full << J)); - MaskHi2 = MaskHi2.Or(APInt(Sz, 0xCCull << J)); - MaskLo2 = MaskLo2.Or(APInt(Sz, 0x33ull << J)); - MaskHi1 = MaskHi1.Or(APInt(Sz, 0xAAull << J)); - MaskLo1 = MaskLo1.Or(APInt(Sz, 0x55ull << J)); + MaskHi4 = MaskHi4 | (0xF0ull << J); + MaskLo4 = MaskLo4 | (0x0Full << J); + MaskHi2 = MaskHi2 | (0xCCull << J); + MaskLo2 = MaskLo2 | (0x33ull << J); + MaskHi1 = MaskHi1 | (0xAAull << J); + MaskLo1 = MaskLo1 | (0x55ull << J); } // BSWAP if the type is wider than a single byte. @@ -3116,7 +3096,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.getVectorIdxTy(DAG.getDataLayout())))); } - Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + Tmp1 = DAG.getBuildVector(VT, dl, Ops); // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); @@ -3817,7 +3797,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { VT.getScalarType(), Ex, Sh)); } SDValue Result = - DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars); + DAG.getBuildVector(Node->getValueType(0), dl, Scalars); Results.push_back(Result); } break; @@ -4487,8 +4467,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { NewOps.push_back(Elt); } - SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps); - + SDValue NewVec = DAG.getBuildVector(MidVT, SL, NewOps); Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec)); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b0bd31d..3715aee 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -57,8 +57,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; case ISD::Constant: Res = PromoteIntRes_Constant(N); break; - case ISD::CONVERT_RNDSAT: - Res = PromoteIntRes_CONVERT_RNDSAT(N); break; case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; @@ -354,18 +352,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { return Result; } -SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { - ISD::CvtCode CvtCode = cast(N)->getCvtCode(); - assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || - CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || - CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && - "can only promote integers"); - EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getConvertRndSat(OutVT, SDLoc(N), N->getOperand(0), - N->getOperand(1), N->getOperand(2), - N->getOperand(3), N->getOperand(4), CvtCode); -} - SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); @@ -428,7 +414,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the // original operation was undefined anyway, so the assert is still correct. - return DAG.getNode(NewOpc == ISD::FP_TO_UINT ? + // + // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: + // before legalization: fp-to-uint16, 65534. -> 0xfffe + // after legalization: fp-to-sint32, 65534. -> 0x0000fffe + return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } @@ -508,7 +498,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { N->getIndex()}; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -883,8 +873,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; - case ISD::CONVERT_RNDSAT: - Res = PromoteIntOp_CONVERT_RNDSAT(N); break; case ISD::INSERT_VECTOR_ELT: Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; case ISD::SCALAR_TO_VECTOR: @@ -1064,18 +1052,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { - ISD::CvtCode CvtCode = cast(N)->getCvtCode(); - assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || - CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || - CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && - "can only promote integer arguments"); - SDValue InOp = GetPromotedInteger(N->getOperand(0)); - return DAG.getConvertRndSat(N->getValueType(0), SDLoc(N), InOp, - N->getOperand(1), N->getOperand(2), - N->getOperand(3), N->getOperand(4), CvtCode); -} - SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo) { if (OpNo == 1) { @@ -1710,7 +1686,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, EVT CCT = getSetCCResultType(NVT); // Hi part is always the same op - Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH}); + Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); // We need to know whether to select Lo part that corresponds to 'winning' // Hi part or if Hi parts are equal. @@ -1721,7 +1697,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL); // Recursed Lo part if Hi parts are equal, this uses unsigned version - SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL}); + SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL}); Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 8470342..5916956 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -117,6 +117,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { Mapped |= 64; if (WidenedVectors.find(Res) != WidenedVectors.end()) Mapped |= 128; + if (PromotedFloats.find(Res) != PromotedFloats.end()) + Mapped |= 256; if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes @@ -159,6 +161,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << " SplitVectors"; if (Mapped & 128) dbgs() << " WidenedVectors"; + if (Mapped & 256) + dbgs() << " PromotedFloats"; dbgs() << "\n"; llvm_unreachable(nullptr); } @@ -195,8 +199,7 @@ bool DAGTypeLegalizer::run() { // non-leaves. for (SDNode &Node : DAG.allnodes()) { if (Node.getNumOperands() == 0) { - Node.setNodeId(ReadyToProcess); - Worklist.push_back(&Node); + AddToWorklist(&Node); } else { Node.setNodeId(Unanalyzed); } @@ -327,6 +330,12 @@ bool DAGTypeLegalizer::run() { // to the worklist etc. if (NeedsReanalyzing) { assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + + // Remove any result values from SoftenedFloats as N will be revisited + // again. + for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) + SoftenedFloats.erase(SDValue(N, i)); + N->setNodeId(NewNode); // Recompute the NodeId and correct processed operands, adding the node to // the worklist if ready. @@ -745,6 +754,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // new uses of From due to CSE. If this happens, replace the new uses of // From with To. } while (!From.use_empty()); + + SoftenedFloats.erase(From); } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { @@ -1017,22 +1028,6 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair, DAG.getIntPtrConstant(1, dl)); } -SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, - SDValue Index) { - SDLoc dl(Index); - // Make sure the index type is big enough to compute in. - Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout())); - - // Calculate the element offset and add it to the pointer. - unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. - assert(EltSize * 8 == EltVT.getSizeInBits() && - "Converting bits to bytes lost precision"); - - Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, - DAG.getConstant(EltSize, dl, Index.getValueType())); - return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); -} - /// Build an integer with low bits Lo and high bits Hi. SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { // Arbitrarily use dlHi for result SDLoc diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d1022af..9d8aaee 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -173,7 +173,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// input operand is returned. SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); @@ -192,6 +191,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi); + void AddToWorklist(SDNode *N) { + N->setNodeId(ReadyToProcess); + Worklist.push_back(N); + } + //===--------------------------------------------------------------------===// // Integer Promotion Support: LegalizeIntegerTypes.cpp //===--------------------------------------------------------------------===// @@ -250,7 +254,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntRes_BITREVERSE(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); - SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); SDValue PromoteIntRes_CTLZ(SDNode *N); SDValue PromoteIntRes_CTPOP(SDNode *N); SDValue PromoteIntRes_CTTZ(SDNode *N); @@ -289,7 +292,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); - SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); @@ -600,10 +602,10 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); + SDValue ScalarizeVecRes_VecInregOp(SDNode *N); SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); - SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); @@ -709,7 +711,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_BITCAST(SDNode* N); SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); - SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 57c179a..358d426 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -51,7 +51,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; - case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -66,6 +65,11 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + R = ScalarizeVecRes_VecInregOp(N); + break; case ISD::ANY_EXTEND: case ISD::BITREVERSE: case ISD::BSWAP: @@ -98,6 +102,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: + case ISD::FCANONICALIZE: R = ScalarizeVecRes_UnaryOp(N); break; @@ -179,17 +184,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { return InOp; } -SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { - EVT NewVT = N->getValueType(0).getVectorElementType(); - SDValue Op0 = GetScalarizedVector(N->getOperand(0)); - return DAG.getConvertRndSat(NewVT, SDLoc(N), - Op0, DAG.getValueType(NewVT), - DAG.getValueType(Op0.getValueType()), - N->getOperand(3), - N->getOperand(4), - cast(N)->getCvtCode()); -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0).getVectorElementType(), @@ -269,6 +263,34 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { LHS, DAG.getValueType(ExtVT)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) { + SDLoc DL(N); + SDValue Op = N->getOperand(0); + + EVT OpVT = Op.getValueType(); + EVT OpEltVT = OpVT.getVectorElementType(); + EVT EltVT = N->getValueType(0).getVectorElementType(); + + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + Op = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + switch (N->getOpcode()) { + case ISD::ANY_EXTEND_VECTOR_INREG: + return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op); + } + + llvm_unreachable("Illegal extend_vector_inreg opcode"); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { // If the operand is wider than the vector element type then it is implicitly // truncated. Make that explicit here. @@ -498,7 +520,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) Ops[i] = GetScalarizedVector(N->getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops); + return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); } /// If the input is a vector that needs to be scalarized, it must be <1 x ty>, @@ -621,7 +643,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITREVERSE: case ISD::BSWAP: - case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: case ISD::CTLZ_ZERO_UNDEF: @@ -650,6 +671,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: + case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -794,10 +816,10 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector LoOps(N->op_begin(), N->op_begin()+LoNumElts); - Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps); + Lo = DAG.getBuildVector(LoVT, dl, LoOps); SmallVector HiOps(N->op_begin()+LoNumElts, N->op_end()); - Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps); + Hi = DAG.getBuildVector(HiVT, dl, HiOps); } void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, @@ -846,7 +868,6 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, GetSplitVector(Vec, Lo, Hi); EVT VecVT = Vec.getValueType(); - EVT VecElemVT = VecVT.getVectorElementType(); unsigned VecElems = VecVT.getVectorNumElements(); unsigned SubElems = SubVec.getValueType().getVectorNumElements(); @@ -872,7 +893,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Store the new subvector into the specified index. - SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx); + SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo()); @@ -942,7 +963,12 @@ void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDLoc dl(N); SDValue InLo, InHi; - GetSplitVector(N0, InLo, InHi); + + if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(N0, InLo, InHi); + else + std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0); + EVT InLoVT = InLo.getValueType(); unsigned InNumElements = InLoVT.getVectorNumElements(); @@ -1003,7 +1029,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Store the new element. This may be larger than the vector element type, // so use a truncating store. - SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); Store = @@ -1236,18 +1262,6 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); - } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) { - SDValue DTyOpLo = DAG.getValueType(LoVT); - SDValue DTyOpHi = DAG.getValueType(HiVT); - SDValue STyOpLo = DAG.getValueType(Lo.getValueType()); - SDValue STyOpHi = DAG.getValueType(Hi.getValueType()); - SDValue RndOp = N->getOperand(3); - SDValue SatOp = N->getOperand(4); - ISD::CvtCode CvtCode = cast(N)->getCvtCode(); - Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp, - CvtCode); - Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp, - CvtCode); } else { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -1398,7 +1412,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } // Construct the Lo/Hi output using a BUILD_VECTOR. - Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps); + Output = DAG.getBuildVector(NewVT, dl, SVOps); } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = DAG.getUNDEF(NewVT); @@ -1492,6 +1506,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::FTRUNC: + case ISD::FCANONICALIZE: Res = SplitVecOp_UnaryOp(N); break; } @@ -1641,7 +1656,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { EltVT = MVT::i8; VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, VecVT.getVectorNumElements()); - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps); + Vec = DAG.getBuildVector(VecVT, dl, ElementOps); } // Store the vector to the stack. @@ -1650,7 +1665,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Load back the required element. - StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, MachinePointerInfo(), EltVT); } @@ -1907,7 +1922,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); + return DAG.getBuildVector(N->getValueType(0), DL, Elts); } SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { @@ -2045,7 +2060,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; - case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; @@ -2350,6 +2364,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(Opcode, DL, WidenVT, InOp); return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } + if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { + // If both input and result vector types are of same width, extend + // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which + // accepts fewer elements in the result than in the input. + if (Opcode == ISD::SIGN_EXTEND) + return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT); + if (Opcode == ISD::ZERO_EXTEND) + return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT); + } } if (TLI.isTypeLegal(InWidenVT)) { @@ -2402,7 +2425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); + return DAG.getBuildVector(WidenVT, DL, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { @@ -2457,7 +2480,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { while (Ops.size() != WidenNumElts) Ops.push_back(DAG.getUNDEF(WidenSVT)); - return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); + return DAG.getBuildVector(WidenVT, DL, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { @@ -2620,7 +2643,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps); + return DAG.getBuildVector(WidenVT, dl, NewOps); } SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { @@ -2690,87 +2713,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); -} - -SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { - SDLoc dl(N); - SDValue InOp = N->getOperand(0); - SDValue RndOp = N->getOperand(3); - SDValue SatOp = N->getOperand(4); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - - EVT InVT = InOp.getValueType(); - EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); - - SDValue DTyOp = DAG.getValueType(WidenVT); - SDValue STyOp = DAG.getValueType(InWidenVT); - ISD::CvtCode CvtCode = cast(N)->getCvtCode(); - - unsigned InVTNumElts = InVT.getVectorNumElements(); - if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { - InOp = GetWidenedVector(InOp); - InVT = InOp.getValueType(); - InVTNumElts = InVT.getVectorNumElements(); - if (InVTNumElts == WidenNumElts) - return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - - if (TLI.isTypeLegal(InWidenVT)) { - // Because the result and the input are different vector types, widening - // the result could create a legal type but widening the input might make - // it an illegal type that might lead to repeatedly splitting the input - // and then widening it. To avoid this, we widen the input only if - // it results in a legal type. - if (WidenNumElts % InVTNumElts == 0) { - // Widen the input and call convert on the widened input vector. - unsigned NumConcat = WidenNumElts/InVTNumElts; - SmallVector Ops(NumConcat); - Ops[0] = InOp; - SDValue UndefVal = DAG.getUNDEF(InVT); - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = UndefVal; - - InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops); - return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - - if (InVTNumElts % WidenNumElts == 0) { - // Extract the input and convert the shorten input vector. - InOp = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - } - - // Otherwise unroll into some nasty scalar code and rebuild the vector. - SmallVector Ops(WidenNumElts); - EVT EltVT = WidenVT.getVectorElementType(); - DTyOp = DAG.getValueType(EltVT); - STyOp = DAG.getValueType(InEltVT); - - unsigned MinElts = std::min(InVTNumElts, WidenNumElts); - unsigned i; - for (i=0; i < MinElts; ++i) { - SDValue ExtVal = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - - SDValue UndefVal = DAG.getUNDEF(EltVT); - for (; i < WidenNumElts; ++i) - Ops[i] = UndefVal; - - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); + return DAG.getBuildVector(WidenVT, dl, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { @@ -2811,7 +2754,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); + return DAG.getBuildVector(WidenVT, dl, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -3200,7 +3143,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { @@ -3251,7 +3194,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { @@ -3546,7 +3489,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, LD->getPointerInfo().getWithOffset(Offset), MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); - if (L->getValueType(0).isVector()) { + if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { + // Later code assumes the vector loads produced will be mergeable, so we + // must pad the final entry up to the previous width. Scalars are + // combined separately. SmallVector Loads; Loads.push_back(L); unsigned size = L->getValueSizeInBits(0); @@ -3669,10 +3615,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, for (; i != WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); + return DAG.getBuildVector(WidenVT, dl, Ops); } - void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, StoreSDNode *ST) { // The strategy assumes that we can efficiently store power-of-two widths. @@ -3841,5 +3786,5 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) Ops[Idx] = FillVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); + return DAG.getBuildVector(NVT, dl, Ops); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index fd254d0..58ff3bc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1104,7 +1104,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypePromoteInteger) { EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); - APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); + APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } // In other cases the element type is illegal and needs to be expanded, for @@ -1130,7 +1130,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, SmallVector EltParts; for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) - .trunc(ViaEltSizeInBits), DL, + .zextOrTrunc(ViaEltSizeInBits), DL, ViaEltVT, isT, isO)); } @@ -1629,31 +1629,6 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec); } -SDValue SelectionDAG::getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val, - SDValue DTy, SDValue STy, SDValue Rnd, - SDValue Sat, ISD::CvtCode Code) { - // If the src and dest types are the same and the conversion is between - // integer types of the same sign or two floats, no conversion is necessary. - if (DTy == STy && - (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF)) - return Val; - - FoldingSetNodeID ID; - SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; - AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); - void* IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) - return SDValue(E, 0); - - auto *N = - newSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), Code); - createOperands(N, Ops); - - CSEMap.InsertNode(N, IP); - InsertNode(N); - return SDValue(N, 0); -} - SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); @@ -2087,7 +2062,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (M < 0) { // For UNDEF elements, we don't know anything about the common state of // the shuffle result. - KnownZero = KnownOne = APInt(BitWidth, 0); + KnownOne.clearAllBits(); + KnownZero.clearAllBits(); + DemandedLHS.clearAllBits(); + DemandedRHS.clearAllBits(); break; } @@ -2103,6 +2081,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne &= KnownOne2; KnownZero &= KnownZero2; } + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1); @@ -2126,6 +2107,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne &= KnownOne2; KnownZero &= KnownZero2; } + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; } break; } @@ -2207,6 +2191,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned Offset = (i % SubScale) * BitWidth; KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth); KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth); + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; } } break; @@ -2290,6 +2277,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::SELECT: computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. @@ -2298,6 +2288,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, break; case ISD::SELECT_CC: computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. @@ -2426,6 +2419,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } break; } + case ISD::ZERO_EXTEND_VECTOR_INREG: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarSizeInBits(); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, + DemandedElts.zext(InVT.getVectorNumElements()), + Depth + 1); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + KnownZero |= NewBits; + break; + } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); @@ -2439,6 +2446,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero |= NewBits; break; } + // TODO ISD::SIGN_EXTEND_VECTOR_INREG case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); @@ -2513,6 +2521,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, LLVM_FALLTHROUGH; } case ISD::ADD: + case ISD::ADDC: case ISD::ADDE: { // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the @@ -2533,7 +2542,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZeroLow = std::min(KnownZeroLow, KnownZero2.countTrailingOnes()); - if (Opcode == ISD::ADD) { + if (Opcode == ISD::ADD || Opcode == ISD::ADDC) { KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow); if (KnownZeroHigh > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1); @@ -2680,6 +2689,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } break; } + case ISD::BITREVERSE: { + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + KnownZero = KnownZero2.reverseBits(); + KnownOne = KnownOne2.reverseBits(); + break; + } case ISD::BSWAP: { computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, Depth + 1); @@ -2687,19 +2703,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne = KnownOne2.byteSwap(); break; } - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: + case ISD::UMIN: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + + // UMIN - we know that the result will have the maximum of the + // known zero leading bits of the inputs. + unsigned LeadZero = KnownZero.countLeadingOnes(); + LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes()); + + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + KnownZero |= APInt::getHighBitsSet(BitWidth, LeadZero); + break; + } case ISD::UMAX: { - APInt Op0Zero, Op0One; - APInt Op1Zero, Op1One; - computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, DemandedElts, + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, DemandedElts, + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, Depth + 1); - KnownZero = Op0Zero & Op1Zero; - KnownOne = Op0One & Op1One; + // UMAX - we know that the result will have the maximum of the + // known one leading bits of the inputs. + unsigned LeadOne = KnownOne.countLeadingOnes(); + LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes()); + + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + KnownOne |= APInt::getHighBitsSet(BitWidth, LeadOne); + break; + } + case ISD::SMIN: + case ISD::SMAX: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + // If we don't know any bits, early out. + if (!KnownOne && !KnownZero) + break; + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; break; } case ISD::FrameIndex: @@ -2726,6 +2772,40 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } +SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, + SDValue N1) const { + // X + 0 never overflow + if (isNullConstant(N1)) + return OFK_Never; + + APInt N1Zero, N1One; + computeKnownBits(N1, N1Zero, N1One); + if (N1Zero.getBoolValue()) { + APInt N0Zero, N0One; + computeKnownBits(N0, N0Zero, N0One); + + bool overflow; + (~N0Zero).uadd_ov(~N1Zero, overflow); + if (!overflow) + return OFK_Never; + } + + // mulhi + 1 never overflow + if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && + (~N1Zero & 0x01) == ~N1Zero) + return OFK_Never; + + if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { + APInt N0Zero, N0One; + computeKnownBits(N0, N0Zero, N0One); + + if ((~N0Zero & 0x01) == ~N0Zero) + return OFK_Never; + } + + return OFK_Sometime; +} + bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarSizeInBits(); @@ -2752,7 +2832,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // Are all operands of a build vector constant powers of two? if (Val.getOpcode() == ISD::BUILD_VECTOR) - if (llvm::all_of(Val->ops(), [this, BitWidth](SDValue E) { + if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) { if (ConstantSDNode *C = dyn_cast(E)) return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); return false; @@ -2794,6 +2874,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { } case ISD::SIGN_EXTEND: + case ISD::SIGN_EXTEND_VECTOR_INREG: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; @@ -2894,6 +2975,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { } break; case ISD::ADD: + case ISD::ADDC: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); @@ -3061,6 +3143,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { if (getTarget().Options.NoNaNsFPMath) return true; + if (const BinaryWithFlagsSDNode *BF = dyn_cast(Op)) + return BF->Flags.hasNoNaNs(); + // If the value is a constant, we can obviously see if it is a NaN or not. if (const ConstantFPSDNode *C = dyn_cast(Op)) return !C->getValueAPF().isNaN(); @@ -3213,6 +3298,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); break; + case ISD::BITREVERSE: + return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), C->isOpaque()); @@ -3227,6 +3315,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), C->isOpaque()); + case ISD::FP16_TO_FP: { + bool Ignored; + APFloat FPV(APFloat::IEEEhalf(), + (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); + + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)FPV.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstantFP(FPV, DL, VT); + } } } @@ -3288,6 +3387,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); break; + case ISD::FP_TO_FP16: { + bool Ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(APFloat::IEEEhalf(), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstant(V.bitcastToAPInt(), DL, VT); + } } } @@ -3310,6 +3417,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: @@ -3549,6 +3657,12 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (Cst1->isOpaque() || Cst2->isOpaque()) return SDValue(); + // Division/remainder with a zero divisor is undefined behavior. + if ((Opcode == ISD::SDIV || Opcode == ISD::UDIV || + Opcode == ISD::SREM || Opcode == ISD::UREM) && + Cst2->isNullValue()) + return getUNDEF(VT); + std::pair Folded = FoldValue(Opcode, Cst1->getAPIntValue(), Cst2->getAPIntValue()); if (!Folded.second) @@ -3683,7 +3797,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, // Find legal integer scalar type for constant promotion and // ensure that its scalar size is at least as large as source. EVT LegalSVT = VT.getScalarType(); - if (LegalSVT.isInteger()) { + if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); if (LegalSVT.bitsLT(VT.getScalarType())) return SDValue(); @@ -4047,6 +4161,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (VT.getSimpleVT() == N1.getSimpleValueType()) return N1; + // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. + if (N1.isUndef()) + return getUNDEF(VT); + + // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of + // the concat have the same type as the extract. + if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0 && + VT == N1.getOperand(0).getValueType()) { + unsigned Factor = VT.getVectorNumElements(); + return N1.getOperand(N2C->getZExtValue() / Factor); + } + // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created // during shuffle legalization. if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && @@ -6159,14 +6286,14 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, return New; } -/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away /// the line number information on the merged node since it is not possible to /// preserve the information that operation is associated with multiple lines. /// This will make the debugger working better at -O0, were there is a higher /// probability having other instructions associated with that line. /// /// For IROrder, we keep the smaller of the two -SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &OLoc) { +SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) { DebugLoc NLoc = N->getDebugLoc(); if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { N->setDebugLoc(DebugLoc()); @@ -6200,7 +6327,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops); if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP)) - return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); + return UpdateSDLocOnMergeSDNode(ON, SDLoc(N)); } if (!RemoveNodeFromCSEMaps(N)) @@ -6352,7 +6479,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, AddNodeIDNode(ID, ~Opcode, VTs, Ops); IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { - return cast(UpdadeSDLocOnMergedSDNode(E, DL)); + return cast(UpdateSDLocOnMergeSDNode(E, DL)); } } @@ -7056,6 +7183,21 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const { return Seen; } +/// Return true if the only users of N are contained in Nodes. +bool SDNode::areOnlyUsersOf(ArrayRef Nodes, const SDNode *N) { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (llvm::any_of(Nodes, + [&User](const SDNode *Node) { return User == Node; })) + Seen = true; + else + return false; + } + + return Seen; +} + /// isOperand - Return true if this node is an operand of N. /// bool SDValue::isOperandOf(const SDNode *N) const { @@ -7109,11 +7251,6 @@ bool SDNode::hasPredecessor(const SDNode *N) const { return hasPredecessorHelper(N, Visited, Worklist); } -uint64_t SDNode::getConstantOperandVal(unsigned Num) const { - assert(Num < NumOperands && "Invalid child # of SDNode!"); - return cast(OperandList[Num])->getZExtValue(); -} - const SDNodeFlags *SDNode::getFlags() const { if (auto *FlagsNode = dyn_cast(this)) return &FlagsNode->Flags; @@ -7384,7 +7521,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned BitPos = j * EltBitSize; if (OpVal.isUndef()) - SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); + SplatUndef.setBits(BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast(OpVal)) SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). zextOrTrunc(sz) << BitPos; @@ -7517,6 +7654,16 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { return nullptr; } +SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa(N)) + return N.getNode(); + + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); + + return nullptr; +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl &Visited, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6e8bed1..36e0243 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -647,10 +648,6 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, } } -/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from -/// this value and returns the result as a ValueVT value. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, const SDLoc &dl, SDValue &Chain, @@ -752,10 +749,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } -/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the -/// specified value into the registers specified by this object. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V, @@ -809,9 +802,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } -/// AddInlineAsmOperands - Add this value to the specified inlineasm node -/// operand list. This adds the code marker and includes the number of -/// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, @@ -863,12 +853,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, LPadToCallSiteMap.clear(); } -/// clear - Clear out the current SelectionDAG and the associated -/// state and prepare this SelectionDAGBuilder object to be used -/// for a new block. This doesn't clear out information about -/// additional blocks that are needed to complete switch lowering -/// or PHI node updating; that information is cleared out as it is -/// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); UnusedArgNodeMap.clear(); @@ -880,21 +864,10 @@ void SelectionDAGBuilder::clear() { StatepointLowering.clear(); } -/// clearDanglingDebugInfo - Clear the dangling debug information -/// map. This function is separated from the clear so that debug -/// information that is dangling in a basic block can be properly -/// resolved in a different basic block. This allows the -/// SelectionDAG to resolve dangling debug information attached -/// to PHI nodes. void SelectionDAGBuilder::clearDanglingDebugInfo() { DanglingDebugInfoMap.clear(); } -/// getRoot - Return the current virtual root of the Selection DAG, -/// flushing any PendingLoad items. This must be done before emitting -/// a store or any other node that may need to be ordered after any -/// prior load instructions. -/// SDValue SelectionDAGBuilder::getRoot() { if (PendingLoads.empty()) return DAG.getRoot(); @@ -914,10 +887,6 @@ SDValue SelectionDAGBuilder::getRoot() { return Root; } -/// getControlRoot - Similar to getRoot, but instead of flushing all the -/// PendingLoad items, flush all the PendingExports items. It is necessary -/// to do this before emitting a terminator instruction. -/// SDValue SelectionDAGBuilder::getControlRoot() { SDValue Root = DAG.getRoot(); @@ -950,7 +919,9 @@ void SelectionDAGBuilder::visit(const Instruction &I) { HandlePHINodesInSuccessorBlocks(I.getParent()); } - ++SDNodeOrder; + // Increase the SDNodeOrder if dealing with a non-debug instruction. + if (!isa(I)) + ++SDNodeOrder; CurInst = &I; @@ -1604,7 +1575,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, BranchProbability TProb, - BranchProbability FProb) { + BranchProbability FProb, + bool InvertCond) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1618,10 +1590,14 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast(Cond)) { - Condition = getICmpCondCode(IC->getPredicate()); + ICmpInst::Predicate Pred = + InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + Condition = getICmpCondCode(Pred); } else { const FCmpInst *FC = cast(Cond); - Condition = getFCmpCondCode(FC->getPredicate()); + FCmpInst::Predicate Pred = + InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + Condition = getFCmpCondCode(Pred); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } @@ -1634,7 +1610,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, } // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), + ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; + CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); } @@ -1647,16 +1624,44 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *SwitchBB, Instruction::BinaryOps Opc, BranchProbability TProb, - BranchProbability FProb) { - // If this node is not part of the or/and tree, emit it as a branch. + BranchProbability FProb, + bool InvertCond) { + // Skip over not part of the tree and remember to invert op and operands at + // next level. + if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) { + const Value *CondOp = BinaryOperator::getNotArgument(Cond); + if (InBlock(CondOp, CurBB->getBasicBlock())) { + FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + } + const Instruction *BOp = dyn_cast(Cond); + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + unsigned BOpc = 0; + if (BOp) { + BOpc = BOp->getOpcode(); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. if (!BOp || !(isa(BOp) || isa(BOp)) || - (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOpc != Opc || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, - TProb, FProb); + TProb, FProb, InvertCond); return; } @@ -1691,14 +1696,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb); + NewTrueProb, NewFalseProb, InvertCond); // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). SmallVector Probs{TProb / 2, FProb}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1]); + Probs[0], Probs[1], InvertCond); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -1724,14 +1729,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb); + NewTrueProb, NewFalseProb, InvertCond); // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). SmallVector Probs{TProb, FProb / 2}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1]); + Probs[0], Probs[1], InvertCond); } } @@ -1815,7 +1820,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), - getEdgeProbability(BrMBB, Succ1MBB)); + getEdgeProbability(BrMBB, Succ1MBB), + /*InvertCond=*/false); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -2936,7 +2942,7 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that - // is not what we are looking for. Only regcognize a bitcast of a genuine + // is not what we are looking for. Only recognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast(I.getOperand(0))) setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, @@ -3089,14 +3095,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (SrcNumElts > MaskNumElts) { // Analyze the access pattern of the vector to see if we can extract - // two subvectors and do the shuffle. The analysis is done by calculating - // the range of elements the mask access on both vectors. - int MinRange[2] = { static_cast(SrcNumElts), - static_cast(SrcNumElts)}; - int MaxRange[2] = {-1, -1}; - - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; + // two subvectors and do the shuffle. + int StartIdx[2] = { -1, -1 }; // StartIdx to extract from + bool CanExtract = true; + for (int Idx : Mask) { unsigned Input = 0; if (Idx < 0) continue; @@ -3105,41 +3107,28 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Input = 1; Idx -= SrcNumElts; } - if (Idx > MaxRange[Input]) - MaxRange[Input] = Idx; - if (Idx < MinRange[Input]) - MinRange[Input] = Idx; - } - // Check if the access is smaller than the vector size and can we find - // a reasonable extract index. - int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not - // Extract. - int StartIdx[2]; // StartIdx to extract from - for (unsigned Input = 0; Input < 2; ++Input) { - if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { - RangeUse[Input] = 0; // Unused - StartIdx[Input] = 0; - continue; - } - - // Find a good start index that is a multiple of the mask length. Then - // see if the rest of the elements are in range. - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts <= SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. + // If all the indices come from the same MaskNumElts sized portion of + // the sources we can use extract. Also make sure the extract wouldn't + // extract past the end of the source. + int NewStartIdx = alignDown(Idx, MaskNumElts); + if (NewStartIdx + MaskNumElts > SrcNumElts || + (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx)) + CanExtract = false; + // Make sure we always update StartIdx as we use it to track if all + // elements are undef. + StartIdx[Input] = NewStartIdx; } - if (RangeUse[0] == 0 && RangeUse[1] == 0) { + if (StartIdx[0] < 0 && StartIdx[1] < 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } - if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { + if (CanExtract) { // Extract appropriate subvector and generate a vector shuffle for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; - if (RangeUse[Input] == 0) + if (StartIdx[Input] < 0) Src = DAG.getUNDEF(VT); else { Src = DAG.getNode( @@ -3150,16 +3139,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } // Calculate new mask. - SmallVector MappedOps; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx >= 0) { - if (Idx < (int)SrcNumElts) - Idx -= StartIdx[0]; - else - Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; - } - MappedOps.push_back(Idx); + SmallVector MappedOps(Mask.begin(), Mask.end()); + for (int &Idx : MappedOps) { + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + else if (Idx >= 0) + Idx -= StartIdx[0]; } setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); @@ -3173,8 +3158,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); SmallVector Ops; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; + for (int Idx : Mask) { SDValue Res; if (Idx < 0) { @@ -3303,7 +3287,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); - // In an inbouds GEP with an offset that is nonnegative even when + // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (int64_t(Offset) >= 0 && cast(I).isInBounds()) @@ -4774,7 +4758,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( else FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op) + .add(*Op) .addImm(Offset) .addMetadata(Variable) .addMetadata(Expr)); @@ -4786,7 +4770,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DILocalVariable *Variable, DIExpression *Expr, int64_t Offset, - DebugLoc dl, + const DebugLoc &dl, unsigned DbgSDNodeOrder) { SDDbgValue *SDV; auto *FISDN = dyn_cast(N.getNode()); @@ -4816,9 +4800,9 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, # define setjmp_undefined_for_msvc #endif -/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If -/// we want to emit this as a call to a named external function, return the name -/// otherwise lower it and return null. +/// Lower the call to the specified intrinsic function. If we want to emit this +/// as a call to a named external function, return the name. Otherwise, lower it +/// and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -4919,6 +4903,51 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { updateDAGForMaybeTailCall(MM); return nullptr; } + case Intrinsic::memcpy_element_atomic: { + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Src = getValue(I.getArgOperand(1)); + SDValue NumElements = getValue(I.getArgOperand(2)); + SDValue ElementSize = getValue(I.getArgOperand(3)); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = I.getArgOperand(2)->getType(); + Entry.Node = NumElements; + Args.push_back(Entry); + + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Entry.Node = ElementSize; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = + cast(I.getArgOperand(3))->getZExtValue(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl) + .setChain(getRoot()) + .setCallee(TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol( + TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); DILocalVariable *Variable = DI.getVariable(); @@ -5188,39 +5217,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return nullptr; } - case Intrinsic::convertff: - case Intrinsic::convertfsi: - case Intrinsic::convertfui: - case Intrinsic::convertsif: - case Intrinsic::convertuif: - case Intrinsic::convertss: - case Intrinsic::convertsu: - case Intrinsic::convertus: - case Intrinsic::convertuu: { - ISD::CvtCode Code = ISD::CVT_INVALID; - switch (Intrinsic) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::convertff: Code = ISD::CVT_FF; break; - case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; - case Intrinsic::convertfui: Code = ISD::CVT_FU; break; - case Intrinsic::convertsif: Code = ISD::CVT_SF; break; - case Intrinsic::convertuif: Code = ISD::CVT_UF; break; - case Intrinsic::convertss: Code = ISD::CVT_SS; break; - case Intrinsic::convertsu: Code = ISD::CVT_SU; break; - case Intrinsic::convertus: Code = ISD::CVT_US; break; - case Intrinsic::convertuu: Code = ISD::CVT_UU; break; - } - EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - const Value *Op1 = I.getArgOperand(0); - Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), - DAG.getValueType(DestVT), - DAG.getValueType(getValue(Op1).getValueType()), - getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)), - Code); - setValue(&I, Res); - return nullptr; - } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); @@ -5311,6 +5307,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return nullptr; + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + visitConstrainedFPIntrinsic(I, Intrinsic); + return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -5759,6 +5762,46 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } } +void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, + unsigned Intrinsic) { + SDLoc sdl = getCurSDLoc(); + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::experimental_constrained_fadd: + Opcode = ISD::STRICT_FADD; + break; + case Intrinsic::experimental_constrained_fsub: + Opcode = ISD::STRICT_FSUB; + break; + case Intrinsic::experimental_constrained_fmul: + Opcode = ISD::STRICT_FMUL; + break; + case Intrinsic::experimental_constrained_fdiv: + Opcode = ISD::STRICT_FDIV; + break; + case Intrinsic::experimental_constrained_frem: + Opcode = ISD::STRICT_FREM; + break; + } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = getRoot(); + SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)) }; + SmallVector ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); + ValueVTs.push_back(MVT::Other); // Out chain + + SDVTList VTs = DAG.getVTList(ValueVTs); + SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops); + + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + SDValue FPResult = Result.getValue(0); + setValue(&I, FPResult); +} + std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { @@ -5842,6 +5885,15 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + auto *Caller = CS.getInstruction()->getParent()->getParent(); + if (TLI.supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + isTailCall = false; + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; @@ -5913,8 +5965,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, } } -/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. +/// Return true if it only matters that the value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { for (const User *U : V->users()) { if (const ICmpInst *IC = dyn_cast(U)) @@ -5968,8 +6019,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, return LoadVal; } -/// processIntegerCallValue - Record the value for an instruction that -/// produces an integer result, converting the type where necessary. +/// Record the value for an instruction that produces an integer result, +/// converting the type where necessary. void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { @@ -5982,20 +6033,13 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, setValue(&I, Value); } -/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. -/// If so, return true and lower it, otherwise return false and it will be -/// lowered like a normal call. +/// See if we can lower a memcmp call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { - // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) - if (I.getNumArgOperands() != 3) - return false; - const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); - if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || - !I.getArgOperand(2)->getType()->isIntegerTy() || - !I.getType()->isIntegerTy()) - return false; - const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast(Size); if (CSize && CSize->getZExtValue() == 0) { @@ -6006,11 +6050,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { } const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); - std::pair Res = - TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), - getValue(LHS), getValue(RHS), getValue(Size), - MachinePointerInfo(LHS), - MachinePointerInfo(RHS)); + std::pair Res = TSI.EmitTargetCodeForMemcmp( + DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), + getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); @@ -6019,88 +6061,72 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { - bool ActuallyDoIt = true; - MVT LoadVT; - Type *LoadTy; - switch (CSize->getZExtValue()) { - default: - LoadVT = MVT::Other; - LoadTy = nullptr; - ActuallyDoIt = false; - break; - case 2: - LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(CSize->getContext()); - break; - case 4: - LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(CSize->getContext()); - break; - case 8: - LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(CSize->getContext()); - break; - /* - case 16: - LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(CSize->getContext()); - LoadTy = VectorType::get(LoadTy, 4); - break; - */ - } - - // This turns into unaligned loads. We only do this if the target natively - // supports the MVT we'll be loading or if it is small enough (<= 4) that - // we'll only produce a small number of byte loads. + if (!CSize || !IsOnlyUsedInZeroEqualityComparison(&I)) + return false; - // Require that we can find a legal MVT, and only do this if the target - // supports unaligned loads of that type. Expanding into byte loads would - // bloat the code. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (ActuallyDoIt && CSize->getZExtValue() > 4) { - unsigned DstAS = LHS->getType()->getPointerAddressSpace(); - unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); - // TODO: Handle 5 byte compare as 4-byte + 1 byte. - // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - // TODO: Check alignment of src and dest ptrs. - if (!TLI.isTypeLegal(LoadVT) || - !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || - !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) - ActuallyDoIt = false; - } + MVT LoadVT; + Type *LoadTy; + switch (CSize->getZExtValue()) { + default: + return false; + case 2: + LoadVT = MVT::i16; + LoadTy = Type::getInt16Ty(CSize->getContext()); + break; + case 4: + LoadVT = MVT::i32; + LoadTy = Type::getInt32Ty(CSize->getContext()); + break; + case 8: + LoadVT = MVT::i64; + LoadTy = Type::getInt64Ty(CSize->getContext()); + break; + /* + case 16: + LoadVT = MVT::v4i32; + LoadTy = Type::getInt32Ty(CSize->getContext()); + LoadTy = VectorType::get(LoadTy, 4); + break; + */ + } - if (ActuallyDoIt) { - SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); - SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); + // This turns into unaligned loads. We only do this if the target natively + // supports the MVT we'll be loading or if it is small enough (<= 4) that + // we'll only produce a small number of byte loads. - SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, - ISD::SETNE); - processIntegerCallValue(I, Res, false); - return true; - } + // Require that we can find a legal MVT, and only do this if the target + // supports unaligned loads of that type. Expanding into byte loads would + // bloat the code. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (CSize->getZExtValue() > 4) { + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); + // TODO: Handle 5 byte compare as 4-byte + 1 byte. + // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. + // TODO: Check alignment of src and dest ptrs. + if (!TLI.isTypeLegal(LoadVT) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) + return false; } - - return false; + SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); + SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); + SDValue SetCC = + DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); + processIntegerCallValue(I, SetCC, false); + return true; } -/// visitMemChrCall -- See if we can lower a memchr call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a memchr call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { - // Verify that the prototype makes sense. void *memchr(void *, int, size_t) - if (I.getNumArgOperands() != 3) - return false; - const Value *Src = I.getArgOperand(0); const Value *Char = I.getArgOperand(1); const Value *Length = I.getArgOperand(2); - if (!Src->getType()->isPointerTy() || - !Char->getType()->isIntegerTy() || - !Length->getType()->isIntegerTy() || - !I.getType()->isPointerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = @@ -6116,15 +6142,12 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { return false; } -/// -/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to -/// to adjust the dst pointer by the size of the copied memory. +/// See if we can lower a mempcpy call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { - - // Verify argument count: void *mempcpy(void *, const void *, size_t) - if (I.getNumArgOperands() != 3) - return false; - SDValue Dst = getValue(I.getArgOperand(0)); SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); @@ -6159,19 +6182,13 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { return true; } -/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an -/// optimized form. If so, return true and lower it, otherwise return false -/// and it will be lowered like a normal call. +/// See if we can lower a strcpy call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { - // Verify that the prototype makes sense. char *strcpy(char *, char *) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isPointerTy() || - !I.getType()->isPointerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = @@ -6188,19 +6205,13 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { return false; } -/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. -/// If so, return true and lower it, otherwise return false and it will be -/// lowered like a normal call. +/// See if we can lower a strcmp call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { - // Verify that the prototype makes sense. int strcmp(void*,void*) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isPointerTy() || - !I.getType()->isIntegerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = @@ -6217,17 +6228,13 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { return false; } -/// visitStrLenCall -- See if we can lower a strlen call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a strlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { - // Verify that the prototype makes sense. size_t strlen(char *) - if (I.getNumArgOperands() != 1) - return false; - const Value *Arg0 = I.getArgOperand(0); - if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = @@ -6242,19 +6249,13 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { return false; } -/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a strnlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { - // Verify that the prototype makes sense. size_t strnlen(char *, size_t) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isIntegerTy() || - !I.getType()->isIntegerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair Res = @@ -6270,16 +6271,15 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { return false; } -/// visitUnaryFloatCall - If a call instruction is a unary floating-point -/// operation (as expected), translate it to an SDNode with the specified opcode -/// and return true. +/// See if we can lower a unary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it, otherwise return +/// false and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, unsigned Opcode) { - // Sanity check that it really is a unary floating-point call. - if (I.getNumArgOperands() != 1 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - !I.onlyReadsMemory()) + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) return false; SDValue Tmp = getValue(I.getArgOperand(0)); @@ -6287,17 +6287,15 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, return true; } -/// visitBinaryFloatCall - If a call instruction is a binary floating-point -/// operation (as expected), translate it to an SDNode with the specified opcode -/// and return true. +/// See if we can lower a binary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it. Otherwise return +/// false, and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, unsigned Opcode) { - // Sanity check that it really is a binary floating-point call. - if (I.getNumArgOperands() != 2 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - I.getType() != I.getArgOperand(1)->getType() || - !I.onlyReadsMemory()) + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) return false; SDValue Tmp0 = getValue(I.getArgOperand(0)); @@ -6337,20 +6335,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for // some reason. - LibFunc::Func Func; + LibFunc Func; if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && - LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; - case LibFunc::copysign: - case LibFunc::copysignf: - case LibFunc::copysignl: - if (I.getNumArgOperands() == 2 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.getType() == I.getArgOperand(1)->getType() && - I.onlyReadsMemory()) { + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: + // We already checked this call's prototype; verify it doesn't modify + // errno. + if (I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), @@ -6358,122 +6354,122 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } break; - case LibFunc::fabs: - case LibFunc::fabsf: - case LibFunc::fabsl: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: if (visitUnaryFloatCall(I, ISD::FABS)) return; break; - case LibFunc::fmin: - case LibFunc::fminf: - case LibFunc::fminl: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: if (visitBinaryFloatCall(I, ISD::FMINNUM)) return; break; - case LibFunc::fmax: - case LibFunc::fmaxf: - case LibFunc::fmaxl: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: if (visitBinaryFloatCall(I, ISD::FMAXNUM)) return; break; - case LibFunc::sin: - case LibFunc::sinf: - case LibFunc::sinl: + case LibFunc_sin: + case LibFunc_sinf: + case LibFunc_sinl: if (visitUnaryFloatCall(I, ISD::FSIN)) return; break; - case LibFunc::cos: - case LibFunc::cosf: - case LibFunc::cosl: + case LibFunc_cos: + case LibFunc_cosf: + case LibFunc_cosl: if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; - case LibFunc::sqrt: - case LibFunc::sqrtf: - case LibFunc::sqrtl: - case LibFunc::sqrt_finite: - case LibFunc::sqrtf_finite: - case LibFunc::sqrtl_finite: + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + case LibFunc_sqrt_finite: + case LibFunc_sqrtf_finite: + case LibFunc_sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; - case LibFunc::floor: - case LibFunc::floorf: - case LibFunc::floorl: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; break; - case LibFunc::nearbyint: - case LibFunc::nearbyintf: - case LibFunc::nearbyintl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; break; - case LibFunc::ceil: - case LibFunc::ceilf: - case LibFunc::ceill: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: if (visitUnaryFloatCall(I, ISD::FCEIL)) return; break; - case LibFunc::rint: - case LibFunc::rintf: - case LibFunc::rintl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; - case LibFunc::round: - case LibFunc::roundf: - case LibFunc::roundl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: if (visitUnaryFloatCall(I, ISD::FROUND)) return; break; - case LibFunc::trunc: - case LibFunc::truncf: - case LibFunc::truncl: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; break; - case LibFunc::log2: - case LibFunc::log2f: - case LibFunc::log2l: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: if (visitUnaryFloatCall(I, ISD::FLOG2)) return; break; - case LibFunc::exp2: - case LibFunc::exp2f: - case LibFunc::exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; - case LibFunc::memcmp: + case LibFunc_memcmp: if (visitMemCmpCall(I)) return; break; - case LibFunc::mempcpy: + case LibFunc_mempcpy: if (visitMemPCpyCall(I)) return; break; - case LibFunc::memchr: + case LibFunc_memchr: if (visitMemChrCall(I)) return; break; - case LibFunc::strcpy: + case LibFunc_strcpy: if (visitStrCpyCall(I, false)) return; break; - case LibFunc::stpcpy: + case LibFunc_stpcpy: if (visitStrCpyCall(I, true)) return; break; - case LibFunc::strcmp: + case LibFunc_strcmp: if (visitStrCmpCall(I)) return; break; - case LibFunc::strlen: + case LibFunc_strlen: if (visitStrLenCall(I)) return; break; - case LibFunc::strnlen: + case LibFunc_strnlen: if (visitStrNLenCall(I)) return; break; @@ -7317,19 +7313,23 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, if (!Range) return Op; - Constant *Lo = cast(Range->getOperand(0))->getValue(); - if (!Lo->isNullValue()) + ConstantRange CR = getConstantRangeFromMetadata(*Range); + if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet()) return Op; - Constant *Hi = cast(Range->getOperand(1))->getValue(); - unsigned Bits = cast(Hi)->getValue().logBase2(); + APInt Lo = CR.getUnsignedMin(); + if (!Lo.isMinValue()) + return Op; + + APInt Hi = CR.getUnsignedMax(); + unsigned Bits = Hi.getActiveBits(); EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); SDLoc SL = getCurSDLoc(); - SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), - Op, DAG.getValueType(SmallVT)); + SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op, + DAG.getValueType(SmallVT)); unsigned NumVals = Op.getNode()->getNumValues(); if (NumVals == 1) return ZExt; @@ -7755,8 +7755,19 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setZExt(); if (Args[i].isSExt) Flags.setSExt(); - if (Args[i].isInReg) + if (Args[i].isInReg) { + // If we are using vectorcall calling convention, a structure that is + // passed InReg - is surely an HVA + if (CLI.CallConv == CallingConv::X86_VectorCall && + isa(FinalType)) { + // The first value of a structure is marked + if (0 == Value) + Flags.setHvaStart(); + Flags.setHva(); + } + // Set InReg Flag Flags.setInReg(); + } if (Args[i].isSRet) Flags.setSRet(); if (Args[i].isSwiftSelf) @@ -7996,6 +8007,173 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return true; } +typedef DenseMap> + ArgCopyElisionMapTy; + +/// Scan the entry block of the function in FuncInfo for arguments that look +/// like copies into a local alloca. Record any copied arguments in +/// ArgCopyElisionCandidates. +static void +findArgumentCopyElisionCandidates(const DataLayout &DL, + FunctionLoweringInfo *FuncInfo, + ArgCopyElisionMapTy &ArgCopyElisionCandidates) { + // Record the state of every static alloca used in the entry block. Argument + // allocas are all used in the entry block, so we need approximately as many + // entries as we have arguments. + enum StaticAllocaInfo { Unknown, Clobbered, Elidable }; + SmallDenseMap StaticAllocas; + unsigned NumArgs = FuncInfo->Fn->getArgumentList().size(); + StaticAllocas.reserve(NumArgs * 2); + + auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * { + if (!V) + return nullptr; + V = V->stripPointerCasts(); + const auto *AI = dyn_cast(V); + if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI)) + return nullptr; + auto Iter = StaticAllocas.insert({AI, Unknown}); + return &Iter.first->second; + }; + + // Look for stores of arguments to static allocas. Look through bitcasts and + // GEPs to handle type coercions, as long as the alloca is fully initialized + // by the store. Any non-store use of an alloca escapes it and any subsequent + // unanalyzed store might write it. + // FIXME: Handle structs initialized with multiple stores. + for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) { + // Look for stores, and handle non-store uses conservatively. + const auto *SI = dyn_cast(&I); + if (!SI) { + // We will look through cast uses, so ignore them completely. + if (I.isCast()) + continue; + // Ignore debug info intrinsics, they don't escape or store to allocas. + if (isa(I)) + continue; + // This is an unknown instruction. Assume it escapes or writes to all + // static alloca operands. + for (const Use &U : I.operands()) { + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U)) + *Info = StaticAllocaInfo::Clobbered; + } + continue; + } + + // If the stored value is a static alloca, mark it as escaped. + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand())) + *Info = StaticAllocaInfo::Clobbered; + + // Check if the destination is a static alloca. + const Value *Dst = SI->getPointerOperand()->stripPointerCasts(); + StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst); + if (!Info) + continue; + const AllocaInst *AI = cast(Dst); + + // Skip allocas that have been initialized or clobbered. + if (*Info != StaticAllocaInfo::Unknown) + continue; + + // Check if the stored value is an argument, and that this store fully + // initializes the alloca. Don't elide copies from the same argument twice. + const Value *Val = SI->getValueOperand()->stripPointerCasts(); + const auto *Arg = dyn_cast(Val); + if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() || + Arg->getType()->isEmptyTy() || + DL.getTypeStoreSize(Arg->getType()) != + DL.getTypeAllocSize(AI->getAllocatedType()) || + ArgCopyElisionCandidates.count(Arg)) { + *Info = StaticAllocaInfo::Clobbered; + continue; + } + + DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n'); + + // Mark this alloca and store for argument copy elision. + *Info = StaticAllocaInfo::Elidable; + ArgCopyElisionCandidates.insert({Arg, {AI, SI}}); + + // Stop scanning if we've seen all arguments. This will happen early in -O0 + // builds, which is useful, because -O0 builds have large entry blocks and + // many allocas. + if (ArgCopyElisionCandidates.size() == NumArgs) + break; + } +} + +/// Try to elide argument copies from memory into a local alloca. Succeeds if +/// ArgVal is a load from a suitable fixed stack object. +static void tryToElideArgumentCopy( + FunctionLoweringInfo *FuncInfo, SmallVectorImpl &Chains, + DenseMap &ArgCopyElisionFrameIndexMap, + SmallPtrSetImpl &ElidedArgCopyInstrs, + ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, + SDValue ArgVal, bool &ArgHasUses) { + // Check if this is a load from a fixed stack object. + auto *LNode = dyn_cast(ArgVal); + if (!LNode) + return; + auto *FINode = dyn_cast(LNode->getBasePtr().getNode()); + if (!FINode) + return; + + // Check that the fixed stack object is the right size and alignment. + // Look at the alignment that the user wrote on the alloca instead of looking + // at the stack object. + auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg); + assert(ArgCopyIter != ArgCopyElisionCandidates.end()); + const AllocaInst *AI = ArgCopyIter->second.first; + int FixedIndex = FINode->getIndex(); + int &AllocaIndex = FuncInfo->StaticAllocaMap[AI]; + int OldIndex = AllocaIndex; + MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); + if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { + DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack " + "object size\n"); + return; + } + unsigned RequiredAlignment = AI->getAlignment(); + if (!RequiredAlignment) { + RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment( + AI->getAllocatedType()); + } + if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { + DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " + "greater than stack argument alignment (" + << RequiredAlignment << " vs " + << MFI.getObjectAlignment(FixedIndex) << ")\n"); + return; + } + + // Perform the elision. Delete the old stack object and replace its only use + // in the variable info map. Mark the stack object as mutable. + DEBUG({ + dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' + << " Replacing frame index " << OldIndex << " with " << FixedIndex + << '\n'; + }); + MFI.RemoveStackObject(OldIndex); + MFI.setIsImmutableObjectIndex(FixedIndex, false); + AllocaIndex = FixedIndex; + ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); + Chains.push_back(ArgVal.getValue(1)); + + // Avoid emitting code for the store implementing the copy. + const StoreInst *SI = ArgCopyIter->second.second; + ElidedArgCopyInstrs.insert(SI); + + // Check for uses of the argument again so that we can avoid exporting ArgVal + // if it is't used by anything other than the store. + for (const Value *U : Arg.users()) { + if (U != SI) { + ArgHasUses = true; + break; + } + } +} + void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); @@ -8018,15 +8196,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Ins.push_back(RetArg); } + // Look for stores of arguments to static allocas. Mark such arguments with a + // flag to ask the target to give us the memory location of that argument if + // available. + ArgCopyElisionMapTy ArgCopyElisionCandidates; + findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); + // Set up the incoming argument description vector. - unsigned Idx = 1; - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); - I != E; ++I, ++Idx) { + unsigned Idx = 0; + for (const Argument &Arg : F.args()) { + ++Idx; SmallVector ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); - bool isArgValueUsed = !I->use_empty(); + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); + bool isArgValueUsed = !Arg.use_empty(); unsigned PartBase = 0; - Type *FinalType = I->getType(); + Type *FinalType = Arg.getType(); if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) FinalType = cast(FinalType)->getElementType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( @@ -8042,8 +8226,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setZExt(); if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) Flags.setSExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) + if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) { + // If we are using vectorcall calling convention, a structure that is + // passed InReg - is surely an HVA + if (F.getCallingConv() == CallingConv::X86_VectorCall && + isa(Arg.getType())) { + // The first value of a structure is marked + if (0 == Value) + Flags.setHvaStart(); + Flags.setHva(); + } + // Set InReg Flag Flags.setInReg(); + } if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) @@ -8067,7 +8262,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { - PointerType *Ty = cast(I->getType()); + PointerType *Ty = cast(Arg.getType()); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if @@ -8084,6 +8279,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); + if (ArgCopyElisionCandidates.count(&Arg)) + Flags.setCopyElisionCandidate(); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); @@ -8130,7 +8327,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set up the argument values. unsigned i = 0; - Idx = 1; + Idx = 0; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. @@ -8156,25 +8353,39 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ++i; } - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; - ++I, ++Idx) { + SmallVector Chains; + DenseMap ArgCopyElisionFrameIndexMap; + for (const Argument &Arg : F.args()) { + ++Idx; SmallVector ArgValues; SmallVector ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + continue; + + bool ArgHasUses = !Arg.use_empty(); + + // Elide the copying store if the target loaded this argument from a + // suitable fixed stack object. + if (Ins[i].Flags.isCopyElisionCandidate()) { + tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap, + ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, + InVals[i], ArgHasUses); + } // If this argument is unused then remember its value. It is used to generate // debugging information. bool isSwiftErrorArg = TLI->supportSwiftError() && F.getAttributes().hasAttribute(Idx, Attribute::SwiftError); - if (I->use_empty() && NumValues && !isSwiftErrorArg) { - SDB->setUnusedArgValue(&*I, InVals[i]); + if (!ArgHasUses && !isSwiftErrorArg) { + SDB->setUnusedArgValue(&Arg, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast(InVals[i].getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { @@ -8185,16 +8396,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the // function. - if (!I->use_empty() || isSwiftErrorArg) { + if (ArgHasUses || isSwiftErrorArg) { Optional AssertOp; if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) AssertOp = ISD::AssertZext; - ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], - NumParts, PartVT, VT, - nullptr, AssertOp)); + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, + PartVT, VT, nullptr, AssertOp)); } i += NumParts; @@ -8207,18 +8417,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast(ArgValues[0].getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); - SDB->setValue(&*I, Res); + SDB->setValue(&Arg, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } // Update the SwiftErrorVRegDefMap. @@ -8238,18 +8448,36 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // uses with vregs. unsigned Reg = cast(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - FuncInfo->ValueMap[&*I] = Reg; + FuncInfo->ValueMap[&Arg] = Reg; continue; } } - if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { - FuncInfo->InitializeRegForValue(&*I); - SDB->CopyToExportRegsIfNeeded(&*I); + if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(&Arg); + SDB->CopyToExportRegsIfNeeded(&Arg); } } + if (!Chains.empty()) { + Chains.push_back(NewRoot); + NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + } + + DAG.setRoot(NewRoot); + assert(i == InVals.size() && "Argument register count mismatch!"); + // If any argument copy elisions occurred and we have debug info, update the + // stale frame indices used in the dbg.declare variable info table. + MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo(); + if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) { + for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) { + auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot); + if (I != ArgCopyElisionFrameIndexMap.end()) + VI.Slot = I->second; + } + } + // Finally, if the target has anything special to do, allow it to do so. EmitFunctionEntryCode(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index abde8a8..c6acc09 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -616,33 +616,27 @@ class SelectionDAGBuilder { void init(GCFunctionInfo *gfi, AliasAnalysis &aa, const TargetLibraryInfo *li); - /// clear - Clear out the current SelectionDAG and the associated - /// state and prepare this SelectionDAGBuilder object to be used - /// for a new block. This doesn't clear out information about - /// additional blocks that are needed to complete switch lowering - /// or PHI node updating; that information is cleared out as it is - /// consumed. + /// Clear out the current SelectionDAG and the associated state and prepare + /// this SelectionDAGBuilder object to be used for a new block. This doesn't + /// clear out information about additional blocks that are needed to complete + /// switch lowering or PHI node updating; that information is cleared out as + /// it is consumed. void clear(); - /// clearDanglingDebugInfo - Clear the dangling debug information - /// map. This function is separated from the clear so that debug - /// information that is dangling in a basic block can be properly - /// resolved in a different basic block. This allows the - /// SelectionDAG to resolve dangling debug information attached - /// to PHI nodes. + /// Clear the dangling debug information map. This function is separated from + /// the clear so that debug information that is dangling in a basic block can + /// be properly resolved in a different basic block. This allows the + /// SelectionDAG to resolve dangling debug information attached to PHI nodes. void clearDanglingDebugInfo(); - /// getRoot - Return the current virtual root of the Selection DAG, - /// flushing any PendingLoad items. This must be done before emitting - /// a store or any other node that may need to be ordered after any - /// prior load instructions. - /// + /// Return the current virtual root of the Selection DAG, flushing any + /// PendingLoad items. This must be done before emitting a store or any other + /// node that may need to be ordered after any prior load instructions. SDValue getRoot(); - /// getControlRoot - Similar to getRoot, but instead of flushing all the - /// PendingLoad items, flush all the PendingExports items. It is necessary - /// to do this before emitting a terminator instruction. - /// + /// Similar to getRoot, but instead of flushing all the PendingLoad items, + /// flush all the PendingExports items. It is necessary to do this before + /// emitting a terminator instruction. SDValue getControlRoot(); SDLoc getCurSDLoc() const { @@ -688,12 +682,13 @@ class SelectionDAGBuilder { MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, Instruction::BinaryOps Opc, BranchProbability TW, - BranchProbability FW); + BranchProbability FW, bool InvertCond); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - BranchProbability TW, BranchProbability FW); + BranchProbability TW, BranchProbability FW, + bool InvertCond); bool ShouldEmitAsBranches(const std::vector &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); @@ -900,6 +895,7 @@ class SelectionDAGBuilder { void visitInlineAsm(ImmutableCallSite CS); const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); + void visitConstrainedFPIntrinsic(const CallInst &I, unsigned Intrinsic); void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); @@ -944,8 +940,8 @@ class SelectionDAGBuilder { /// Return the appropriate SDDbgValue based on N. SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable, - DIExpression *Expr, int64_t Offset, DebugLoc dl, - unsigned DbgSDNodeOrder); + DIExpression *Expr, int64_t Offset, + const DebugLoc &dl, unsigned DbgSDNodeOrder); }; /// RegsForValue - This struct represents the registers (physical or virtual) @@ -958,26 +954,23 @@ class SelectionDAGBuilder { /// type. /// struct RegsForValue { - /// ValueVTs - The value types of the values, which may not be legal, and + /// The value types of the values, which may not be legal, and /// may need be promoted or synthesized from one or more registers. - /// SmallVector ValueVTs; - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) + /// The value types of the registers. This is the same size as ValueVTs and it + /// records, for each value, what the type of the assigned register or + /// registers are. (Individual values are never synthesized from more than one + /// type of register.) /// /// With virtual registers, the contents of RegVTs is redundant with TLI's /// getRegisterType member function, however when with physical registers /// it is necessary to have a separate record of the types. - /// SmallVector RegVTs; - /// Regs - This list holds the registers assigned to the values. + /// This list holds the registers assigned to the values. /// Each legal or promoted value requires one register, and each /// expanded value requires multiple registers. - /// SmallVector Regs; RegsForValue(); @@ -987,33 +980,33 @@ struct RegsForValue { RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty); - /// append - Add the specified values to this one. + /// Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); Regs.append(RHS.Regs.begin(), RHS.Regs.end()); } - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. + /// Emit a series of CopyFromReg nodes that copies from this value and returns + /// the result as a ValueVTs value. This uses Chain/Flag as the input and + /// updates them for the output Chain/Flag. If the Flag pointer is NULL, no + /// flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V = nullptr) const; - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified - /// value into the registers specified by this object. This uses Chain/Flag - /// as the input and updates them for the output Chain/Flag. If the Flag - /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used - /// in printing better diagnostic messages on error. + /// Emit a series of CopyToReg nodes that copies the specified value into the + /// registers specified by this object. This uses Chain/Flag as the input and + /// updates them for the output Chain/Flag. If the Flag pointer is nullptr, no + /// flag is used. If V is not nullptr, then it is used in printing better + /// diagnostic messages on error. void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V = nullptr, ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. + /// Add this value to the specified inlineasm node operand list. This adds the + /// code marker, matching input operand index (if applicable), and includes + /// the number of values added into it. void AddInlineAsmOperands(unsigned Kind, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector &Ops) const; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8e06edc..c97c1b3 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -334,34 +334,35 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // Optimization Methods //===----------------------------------------------------------------------===// -/// Check to see if the specified operand of the specified instruction is a -/// constant integer. If so, check to see if there are any bits set in the -/// constant that are not demanded. If so, shrink the constant and return true. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, - const APInt &Demanded) { - SDLoc dl(Op); +/// If the specified instruction has a constant integer operand and there are +/// bits set in that constant that are not demanded, then clear those bits and +/// return true. +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( + SDValue Op, const APInt &Demanded) { + SDLoc DL(Op); + unsigned Opcode = Op.getOpcode(); // FIXME: ISD::SELECT, ISD::SELECT_CC - switch (Op.getOpcode()) { - default: break; + switch (Opcode) { + default: + break; case ISD::XOR: case ISD::AND: case ISD::OR: { - ConstantSDNode *C = dyn_cast(Op.getOperand(1)); - if (!C) return false; + auto *Op1C = dyn_cast(Op.getOperand(1)); + if (!Op1C) + return false; - if (Op.getOpcode() == ISD::XOR && - (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) + // If this is a 'not' op, don't touch it because that's a canonical form. + const APInt &C = Op1C->getAPIntValue(); + if (Opcode == ISD::XOR && (C | ~Demanded).isAllOnesValue()) return false; - // if we can expand it to have all bits set, do it - if (C->getAPIntValue().intersects(~Demanded)) { + if (C.intersects(~Demanded)) { EVT VT = Op.getValueType(); - SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), - DAG.getConstant(Demanded & - C->getAPIntValue(), - dl, VT)); - return CombineTo(Op, New); + SDValue NewC = DAG.getConstant(Demanded & C, DL, VT); + SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); + return CombineTo(Op, NewOp); } break; @@ -759,6 +760,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne &= KnownOne2; KnownZero &= KnownZero2; break; + case ISD::SETCC: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + // If (1) we only need the sign-bit, (2) the setcc operands are the same + // width as the setcc result, and (3) the result of a setcc conforms to 0 or + // -1, we may be able to bypass the setcc. + if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth && + getBooleanContents(Op.getValueType()) == + BooleanContent::ZeroOrNegativeOneBooleanContent) { + // If we're testing X < 0, then this compare isn't needed - just use X! + // FIXME: We're limiting to integer types here, but this should also work + // if we don't care about FP signed-zero. The use of SETLT with FP means + // that we don't care about NaNs. + if (CC == ISD::SETLT && Op1.getValueType().isInteger() && + (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode()))) + return TLO.CombineTo(Op, Op0); + + // TODO: Should we check for other forms of sign-bit comparisons? + // Examples: X <= -1, X >= 0 + } + break; + } case ISD::SHL: if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { unsigned ShAmt = SA->getZExtValue(); @@ -1084,7 +1108,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); - APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); + APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1); APInt NewBits = ~InMask & NewMask; // If none of the top bits are demanded, convert this into an any_extend. @@ -2071,6 +2095,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETO || Cond == ISD::SETUO) return DAG.getSetCC(dl, VT, N0, N0, Cond); + // setcc (fneg x), C -> setcc swap(pred) x, -C + if (N0.getOpcode() == ISD::FNEG) { + ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwapCond, N0.getSimpleValueType())) { + SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1); + return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond); + } + } + // If the condition is not legal, see if we can find an equivalent one // which is legal. if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) { @@ -2491,10 +2525,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, std::make_pair(0u, static_cast(nullptr)); // Figure out which register class contains this reg. - for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), - E = RI->regclass_end(); RCI != E; ++RCI) { - const TargetRegisterClass *RC = *RCI; - + for (const TargetRegisterClass *RC : RI->regclasses()) { // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. if (!isLegalRC(RC)) @@ -3727,7 +3758,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, return Result; } -SDValue +SDValue TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, @@ -3759,6 +3790,49 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); } +static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, + SDValue Idx, + EVT VecVT, + const SDLoc &dl) { + if (isa(Idx)) + return Idx; + + EVT IdxVT = Idx.getValueType(); + unsigned NElts = VecVT.getVectorNumElements(); + if (isPowerOf2_32(NElts)) { + APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), + Log2_32(NElts)); + return DAG.getNode(ISD::AND, dl, IdxVT, Idx, + DAG.getConstant(Imm, dl, IdxVT)); + } + + return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, + DAG.getConstant(NElts - 1, dl, IdxVT)); +} + +SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, + SDValue VecPtr, EVT VecVT, + SDValue Index) const { + SDLoc dl(Index); + // Make sure the index type is big enough to compute in. + Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout())); + + EVT EltVT = VecVT.getVectorElementType(); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getSizeInBits() && + "Converting bits to bytes lost precision"); + + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl); + + EVT IdxVT = Index.getValueType(); + + Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, + DAG.getConstant(EltSize, dl, IdxVT)); + return DAG.getNode(ISD::ADD, dl, IdxVT, Index, VecPtr); +} + //===----------------------------------------------------------------------===// // Implementation of Emulated TLS Model //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 3acc1f9..f9ba30d 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -361,6 +361,11 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::MEMCPY] = "memcpy"; Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8"; + Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -763,6 +768,24 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { return UNKNOWN_LIBCALL; } +RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMCPY_ELEMENT_ATOMIC_1; + case 2: + return MEMCPY_ELEMENT_ATOMIC_2; + case 4: + return MEMCPY_ELEMENT_ATOMIC_4; + case 8: + return MEMCPY_ELEMENT_ATOMIC_8; + case 16: + return MEMCPY_ELEMENT_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } + +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { @@ -815,7 +838,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { HasExtractBitsInsn = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; - MaskAndBranchFoldingIsLegal = false; EnableExtLdPromotion = false; HasFloatingPointExceptions = true; StackPointerRegisterToSaveRestore = 0; @@ -1208,7 +1230,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, // Copy operands before the frame-index. for (unsigned i = 0; i < OperIdx; ++i) - MIB.addOperand(MI->getOperand(i)); + MIB.add(MI->getOperand(i)); // Add frame index operands recognized by stackmaps.cpp if (MFI.isStatepointSpillSlotObjectIndex(FI)) { // indirect-mem-ref tag, size, #FI, offset. @@ -1218,18 +1240,18 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity"); MIB.addImm(StackMaps::IndirectMemRefOp); MIB.addImm(MFI.getObjectSize(FI)); - MIB.addOperand(MI->getOperand(OperIdx)); + MIB.add(MI->getOperand(OperIdx)); MIB.addImm(0); } else { // direct-mem-ref tag, #FI, offset. // Used by patchpoint, and direct alloca arguments to statepoints MIB.addImm(StackMaps::DirectMemRefOp); - MIB.addOperand(MI->getOperand(OperIdx)); + MIB.add(MI->getOperand(OperIdx)); MIB.addImm(0); } // Copy the operands after the frame index. for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i) - MIB.addOperand(MI->getOperand(i)); + MIB.add(MI->getOperand(i)); // Inherit previous memory operands. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); @@ -1918,11 +1940,7 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { /// override the target defaults. static StringRef getRecipEstimateForFunc(MachineFunction &MF) { const Function *F = MF.getFunction(); - StringRef RecipAttrName = "reciprocal-estimates"; - if (!F->hasFnAttribute(RecipAttrName)) - return StringRef(); - - return F->getFnAttribute(RecipAttrName).getValueAsString(); + return F->getFnAttribute("reciprocal-estimates").getValueAsString(); } /// Construct a string for the given reciprocal operation of the given type. diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index d129111..39ea37d 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==// +//===- MCAsmInfo.cpp - Asm Info -------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -16,29 +16,14 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/DataTypes.h" #include "llvm/Support/Dwarf.h" -#include -#include + using namespace llvm; MCAsmInfo::MCAsmInfo() { - PointerSize = 4; - CalleeSaveStackSlotSize = 4; - - IsLittleEndian = true; - StackGrowsUp = false; - HasSubsectionsViaSymbols = false; - HasMachoZeroFillDirective = false; - HasMachoTBSSDirective = false; - MaxInstLength = 4; - MinInstAlignment = 1; - DollarIsPC = false; SeparatorString = ";"; CommentString = "#"; LabelSuffix = ":"; - UseAssignmentForEHBegin = false; - NeedsLocalForSize = false; PrivateGlobalPrefix = "L"; PrivateLabelPrefix = PrivateGlobalPrefix; LinkerPrivateGlobalPrefix = ""; @@ -48,10 +33,6 @@ MCAsmInfo::MCAsmInfo() { Code24Directive = nullptr; Code32Directive = ".code32"; Code64Directive = ".code64"; - AssemblerDialect = 0; - AllowAtInName = false; - SupportsQuotedNames = true; - UseDataRegionDirectives = false; ZeroDirective = "\t.zero\t"; AsciiDirective = "\t.ascii\t"; AscizDirective = "\t.asciz\t"; @@ -60,40 +41,8 @@ MCAsmInfo::MCAsmInfo() { Data24bitsDirective = nullptr; Data32bitsDirective = "\t.long\t"; Data64bitsDirective = "\t.quad\t"; - SunStyleELFSectionSwitchSyntax = false; - UsesELFSectionDirectiveForBSS = false; - AlignmentIsInBytes = true; - TextAlignFillValue = 0; - GPRel64Directive = nullptr; - GPRel32Directive = nullptr; GlobalDirective = "\t.globl\t"; - SetDirectiveSuppressesReloc = false; - HasAggressiveSymbolFolding = true; - COMMDirectiveAlignmentIsInBytes = true; - LCOMMDirectiveAlignmentType = LCOMM::NoAlignment; - HasFunctionAlignment = true; - HasDotTypeDotSizeDirective = true; - HasSingleParameterDotFile = true; - HasIdentDirective = false; - HasNoDeadStrip = false; - HasAltEntry = false; WeakDirective = "\t.weak\t"; - WeakRefDirective = nullptr; - HasWeakDefDirective = false; - HasWeakDefCanBeHiddenDirective = false; - HasLinkOnceDirective = false; - HiddenVisibilityAttr = MCSA_Hidden; - HiddenDeclarationVisibilityAttr = MCSA_Hidden; - ProtectedVisibilityAttr = MCSA_Protected; - SupportsDebugInformation = false; - ExceptionsType = ExceptionHandling::None; - WinEHEncodingType = WinEH::EncodingType::Invalid; - DwarfUsesRelocationsAcrossSections = true; - DwarfFDESymbolsUseAbsDiff = false; - DwarfRegNumForCFI = false; - NeedsDwarfSectionOffsetDirective = false; - UseParensForSymbolVariant = false; - UseLogicalShr = true; // FIXME: Clang's logic should be synced with the logic used to initialize // this member and the two implementations should be merged. @@ -109,12 +58,9 @@ MCAsmInfo::MCAsmInfo() { // - The target subclasses for AArch64, ARM, and X86 handle these cases UseIntegratedAssembler = false; PreserveAsmComments = true; - - CompressDebugSections = DebugCompressionType::DCT_None; } -MCAsmInfo::~MCAsmInfo() { -} +MCAsmInfo::~MCAsmInfo() = default; bool MCAsmInfo::isSectionAtomizableBySymbols(const MCSection &Section) const { return false; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index b81fe4b..9799678 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -100,7 +100,7 @@ class MCAsmStreamer final : public MCStreamer { /// file if applicable as a QoI issue to make the output of the compiler /// more readable. This only affects the MCAsmStreamer, and only when /// verbose assembly output is enabled. - void AddComment(const Twine &T) override; + void AddComment(const Twine &T, bool EOL = true) override; /// AddEncodingComment - Add a comment showing the encoding of an instruction. void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &); @@ -130,7 +130,7 @@ class MCAsmStreamer final : public MCStreamer { void ChangeSection(MCSection *Section, const MCExpr *Subsection) override; void EmitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) override; - void EmitLabel(MCSymbol *Symbol) override; + void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitLinkerOptions(ArrayRef Options) override; @@ -150,7 +150,7 @@ class MCAsmStreamer final : public MCStreamer { void EndCOFFSymbolDef() override; void EmitCOFFSafeSEH(MCSymbol const *Symbol) override; void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; - void EmitCOFFSecRel32(MCSymbol const *Symbol) override; + void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override; void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; @@ -301,12 +301,14 @@ class MCAsmStreamer final : public MCStreamer { /// file if applicable as a QoI issue to make the output of the compiler /// more readable. This only affects the MCAsmStreamer, and only when /// verbose assembly output is enabled. -void MCAsmStreamer::AddComment(const Twine &T) { +/// By deafult EOL is set to true so that each comment goes on its own line. +void MCAsmStreamer::AddComment(const Twine &T, bool EOL) { if (!IsVerboseAsm) return; T.toVector(CommentToEmit); - // Each comment goes on its own line. - CommentToEmit.push_back('\n'); + + if (EOL) + CommentToEmit.push_back('\n'); // Place comment in a new line. } void MCAsmStreamer::EmitCommentsAndEOL() { @@ -390,12 +392,13 @@ void MCAsmStreamer::emitExplicitComments() { void MCAsmStreamer::ChangeSection(MCSection *Section, const MCExpr *Subsection) { assert(Section && "Cannot switch to a null section!"); - Section->PrintSwitchToSection(*MAI, OS, Subsection); + Section->PrintSwitchToSection( + *MAI, getContext().getObjectFileInfo()->getTargetTriple(), OS, + Subsection); } -void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { - assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - MCStreamer::EmitLabel(Symbol); +void MCAsmStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) { + MCStreamer::EmitLabel(Symbol, Loc); Symbol->print(OS, MAI); OS << MAI->getLabelSuffix(); @@ -616,9 +619,11 @@ void MCAsmStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { EmitEOL(); } -void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { +void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) { OS << "\t.secrel32\t"; Symbol->print(OS, MAI); + if (Offset != 0) + OS << '+' << Offset; EmitEOL(); } diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 91d748f..36f0ed3 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -11,30 +11,31 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCELFStreamer.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; @@ -42,9 +43,6 @@ bool MCELFStreamer::isBundleLocked() const { return getCurrentSectionOnly()->isBundleLocked(); } -MCELFStreamer::~MCELFStreamer() { -} - void MCELFStreamer::mergeFragment(MCDataFragment *DF, MCDataFragment *EF) { MCAssembler &Assembler = getAssembler(); @@ -95,11 +93,9 @@ void MCELFStreamer::InitSections(bool NoExecStack) { SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx)); } -void MCELFStreamer::EmitLabel(MCSymbol *S) { +void MCELFStreamer::EmitLabel(MCSymbol *S, SMLoc Loc) { auto *Symbol = cast(S); - assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - - MCObjectStreamer::EmitLabel(Symbol); + MCObjectStreamer::EmitLabel(Symbol, Loc); const MCSectionELF &Section = static_cast(*getCurrentSectionOnly()); @@ -149,16 +145,7 @@ void MCELFStreamer::ChangeSection(MCSection *Section, Asm.registerSymbol(*Grp); this->MCObjectStreamer::ChangeSection(Section, Subsection); - MCContext &Ctx = getContext(); - auto *Begin = cast_or_null(Section->getBeginSymbol()); - if (!Begin) { - Begin = Ctx.getOrCreateSectionSymbol(*SectionELF); - Section->setBeginSymbol(Begin); - } - if (Begin->isUndefined()) { - Asm.registerSymbol(*Begin); - Begin->setType(ELF::STT_SECTION); - } + Asm.registerSymbol(*Section->getBeginSymbol()); } void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) { @@ -362,13 +349,6 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment, ValueSize, MaxBytesToEmit); } -// Add a symbol for the file name of this module. They start after the -// null symbol and don't count as normal symbol, i.e. a non-STT_FILE symbol -// with the same name may appear. -void MCELFStreamer::EmitFileDirective(StringRef Filename) { - getAssembler().addFileName(Filename); -} - void MCELFStreamer::EmitIdent(StringRef IdentString) { MCSection *Comment = getAssembler().getContext().getELFSection( ".comment", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, ""); @@ -631,15 +611,6 @@ void MCELFStreamer::FinishImpl() { this->MCObjectStreamer::FinishImpl(); } -MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_pwrite_stream &OS, MCCodeEmitter *CE, - bool RelaxAll) { - MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); - return S; -} - void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) { llvm_unreachable("Generic ELF doesn't support this directive"); } @@ -648,22 +619,6 @@ void MCELFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { llvm_unreachable("ELF doesn't support this directive"); } -void MCELFStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { - llvm_unreachable("ELF doesn't support this directive"); -} - -void MCELFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { - llvm_unreachable("ELF doesn't support this directive"); -} - -void MCELFStreamer::EmitCOFFSymbolType(int Type) { - llvm_unreachable("ELF doesn't support this directive"); -} - -void MCELFStreamer::EndCOFFSymbolDef() { - llvm_unreachable("ELF doesn't support this directive"); -} - void MCELFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { llvm_unreachable("ELF doesn't support this directive"); @@ -673,3 +628,12 @@ void MCELFStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { llvm_unreachable("ELF doesn't support this directive"); } + +MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB, + raw_pwrite_stream &OS, MCCodeEmitter *CE, + bool RelaxAll) { + MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index cd28b18..3b03dd2 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -1,4 +1,4 @@ -//===-- MCMachOStreamer.cpp - MachO Streamer ------------------------------===// +//===- MCMachOStreamer.cpp - MachO Streamer -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,27 +7,35 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCStreamer.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolMachO.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" +#include +#include using namespace llvm; @@ -70,7 +78,7 @@ class MCMachOStreamer : public MCObjectStreamer { /// @{ void ChangeSection(MCSection *Sect, const MCExpr *Subsect) override; - void EmitLabel(MCSymbol *Symbol) override; + void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) override; void EmitAssemblerFlag(MCAssemblerFlag Flag) override; @@ -83,18 +91,7 @@ class MCMachOStreamer : public MCObjectStreamer { void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override; void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; - void BeginCOFFSymbolDef(const MCSymbol *Symbol) override { - llvm_unreachable("macho doesn't support this directive"); - } - void EmitCOFFSymbolStorageClass(int StorageClass) override { - llvm_unreachable("macho doesn't support this directive"); - } - void EmitCOFFSymbolType(int Type) override { - llvm_unreachable("macho doesn't support this directive"); - } - void EndCOFFSymbolDef() override { - llvm_unreachable("macho doesn't support this directive"); - } + void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr, @@ -102,13 +99,6 @@ class MCMachOStreamer : public MCObjectStreamer { void EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment = 0) override; - void EmitFileDirective(StringRef Filename) override { - // FIXME: Just ignore the .file; it isn't important enough to fail the - // entire assembly. - - // report_fatal_error("unsupported directive: '.file'"); - } - void EmitIdent(StringRef IdentString) override { llvm_unreachable("macho doesn't support this directive"); } @@ -142,7 +132,8 @@ static bool canGoAfterDWARF(const MCSectionMachO &MSec) { if (SegName == "__TEXT" && SecName == "__eh_frame") return true; - if (SegName == "__DATA" && SecName == "__nl_symbol_ptr") + if (SegName == "__DATA" && (SecName == "__nl_symbol_ptr" || + SecName == "__thread_ptr")) return true; return false; @@ -180,15 +171,13 @@ void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern); } -void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { - assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - +void MCMachOStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) { // We have to create a new fragment if this is an atom defining symbol, // fragments cannot span atoms. if (getAssembler().isSymbolLinkerVisible(*Symbol)) insert(new MCDataFragment()); - MCObjectStreamer::EmitLabel(Symbol); + MCObjectStreamer::EmitLabel(Symbol, Loc); // This causes the reference type flag to be cleared. Darwin 'as' was "trying" // to clear the weak reference and weak definition bits too, but the diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index da54155..7f83591 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -35,6 +35,7 @@ #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" #include "llvm/MC/MCParser/MCAsmParserUtils.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" @@ -42,6 +43,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -55,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -67,7 +70,7 @@ using namespace llvm; -MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {} +MCAsmParserSemaCallback::~MCAsmParserSemaCallback() = default; static cl::opt AsmMacroMaxNestingDepth( "asm-macro-max-nesting-depth", cl::init(20), cl::Hidden, @@ -82,10 +85,10 @@ typedef std::vector MCAsmMacroArguments; struct MCAsmMacroParameter { StringRef Name; MCAsmMacroArgument Value; - bool Required; - bool Vararg; + bool Required = false; + bool Vararg = false; - MCAsmMacroParameter() : Required(false), Vararg(false) {} + MCAsmMacroParameter() = default; }; typedef std::vector MCAsmMacroParameters; @@ -124,23 +127,20 @@ struct ParseStatementInfo { SmallVector, 8> ParsedOperands; /// \brief The opcode from the last parsed instruction. - unsigned Opcode; + unsigned Opcode = ~0U; /// \brief Was there an error parsing the inline assembly? - bool ParseError; + bool ParseError = false; - SmallVectorImpl *AsmRewrites; + SmallVectorImpl *AsmRewrites = nullptr; - ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(nullptr) {} + ParseStatementInfo() = default; ParseStatementInfo(SmallVectorImpl *rewrites) - : Opcode(~0), ParseError(false), AsmRewrites(rewrites) {} + : AsmRewrites(rewrites) {} }; /// \brief The concrete assembly parser instance. class AsmParser : public MCAsmParser { - AsmParser(const AsmParser &) = delete; - void operator=(const AsmParser &) = delete; - private: AsmLexer Lexer; MCContext &Ctx; @@ -199,17 +199,19 @@ class AsmParser : public MCAsmParser { unsigned LastQueryLine; /// AssemblerDialect. ~OU means unset value and use value provided by MAI. - unsigned AssemblerDialect; + unsigned AssemblerDialect = ~0U; /// \brief is Darwin compatibility enabled? - bool IsDarwin; + bool IsDarwin = false; /// \brief Are we parsing ms-style inline assembly? - bool ParsingInlineAsm; + bool ParsingInlineAsm = false; public: AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, - const MCAsmInfo &MAI); + const MCAsmInfo &MAI, unsigned CB); + AsmParser(const AsmParser &) = delete; + AsmParser &operator=(const AsmParser &) = delete; ~AsmParser() override; bool Run(bool NoInitialTextSection, bool NoFinalize = false) override; @@ -223,7 +225,6 @@ class AsmParser : public MCAsmParser { DirectiveKindMap[Directive] = DirectiveKindMap[Alias]; } -public: /// @name MCAsmParser Interface /// { @@ -258,7 +259,7 @@ class AsmParser : public MCAsmParser { bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, - SmallVectorImpl > &OpDecls, + SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, const MCInstPrinter *IP, @@ -572,11 +573,9 @@ extern MCAsmParserExtension *createCOFFAsmParser(); enum { DEFAULT_ADDRSPACE = 0 }; AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, - const MCAsmInfo &MAI) + const MCAsmInfo &MAI, unsigned CB = 0) : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM), - PlatformParser(nullptr), CurBuffer(SM.getMainFileID()), - MacrosEnabledFlag(true), CppHashInfo(), AssemblerDialect(~0U), - IsDarwin(false), ParsingInlineAsm(false) { + CurBuffer(CB ? CB : SM.getMainFileID()), MacrosEnabledFlag(true) { HadError = false; // Save the old handler. SavedDiagHandler = SrcMgr.getDiagHandler(); @@ -597,6 +596,9 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, case MCObjectFileInfo::IsELF: PlatformParser.reset(createELFAsmParser()); break; + case MCObjectFileInfo::IsWasm: + llvm_unreachable("Wasm parsing not supported yet"); + break; } PlatformParser->Initialize(*this); @@ -608,6 +610,10 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, AsmParser::~AsmParser() { assert((HadError || ActiveMacros.empty()) && "Unexpected active macro instantiation!"); + + // Restore the saved diagnostics handler and context for use during + // finalization. + SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext); } void AsmParser::printMacroInstantiations() { @@ -979,7 +985,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; // Lookup the symbol variant if used. - if (Split.second.size()) { + if (!Split.second.empty()) { Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); if (Variant != MCSymbolRefExpr::VK_Invalid) { SymbolName = Split.first; @@ -1005,7 +1011,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } // Otherwise create a symbol ref. - Res = MCSymbolRefExpr::create(Sym, Variant, getContext()); + Res = MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc); return false; } case AsmToken::BigNum: @@ -1436,6 +1442,7 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K, /// Res contains the LHS of the expression on input. bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc) { + SMLoc StartLoc = Lexer.getLoc(); while (true) { MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); @@ -1460,7 +1467,7 @@ bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, return true; // Merge LHS and RHS according to operator. - Res = MCBinaryExpr::create(Kind, Res, RHS, getContext()); + Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc); } } @@ -1617,7 +1624,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, if (ParsingInlineAsm && SI) { StringRef RewrittenLabel = SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true); - assert(RewrittenLabel.size() && + assert(!RewrittenLabel.empty() && "We should have an internal name here."); Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(), RewrittenLabel); @@ -1626,12 +1633,6 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, Sym = getContext().getOrCreateSymbol(IDVal); } else Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal); - - Sym->redefineIfPossible(); - - if (!Sym->isUndefined() || Sym->isVariable()) - return Error(IDLoc, "invalid symbol redefinition"); - // End of Labels should be treated as end of line for lexing // purposes but that information is not available to the Lexer who // does not understand Labels. This may cause us to see a Hash @@ -1650,7 +1651,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, // Emit the label. if (!ParsingInlineAsm) - Out.EmitLabel(Sym); + Out.EmitLabel(Sym, IDLoc); // If we are generating dwarf for assembly source files then gather the // info to make a dwarf label entry for this label if needed. @@ -2025,7 +2026,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, // If we previously parsed a cpp hash file line comment then make sure the // current Dwarf File is for the CppHashFilename if not then emit the // Dwarf File table for it and adjust the line number for the .loc. - if (CppHashInfo.Filename.size()) { + if (!CppHashInfo.Filename.empty()) { unsigned FileNumber = getStreamer().EmitDwarfFileDirective( 0, StringRef(), CppHashInfo.Filename); getContext().setGenDwarfFileNumber(FileNumber); @@ -4191,7 +4192,6 @@ bool AsmParser::parseDirectiveBundleUnlock() { /// parseDirectiveSpace /// ::= (.skip | .space) expression [ , expression ] bool AsmParser::parseDirectiveSpace(StringRef IDVal) { - SMLoc NumBytesLoc = Lexer.getLoc(); const MCExpr *NumBytes; if (checkForValidSection() || parseExpression(NumBytes)) @@ -4287,7 +4287,6 @@ bool AsmParser::parseDirectiveRealDCB(StringRef IDVal, const fltSemantics &Seman /// parseDirectiveDS /// ::= .ds.{b, d, l, p, s, w, x} expression bool AsmParser::parseDirectiveDS(StringRef IDVal, unsigned Size) { - SMLoc NumValuesLoc = Lexer.getLoc(); int64_t NumValues; if (checkForValidSection() || parseAbsoluteExpression(NumValues)) @@ -4416,6 +4415,7 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) { return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " "alignment, can't be less than zero"); + Sym->redefineIfPossible(); if (!Sym->isUndefined()) return Error(IDLoc, "invalid symbol redefinition"); @@ -5208,7 +5208,7 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA, bool AsmParser::parseMSInlineAsm( void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, - unsigned &NumInputs, SmallVectorImpl > &OpDecls, + unsigned &NumInputs, SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { @@ -5518,6 +5518,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef, /// \brief Create an MCAsmParser instance. MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, MCContext &C, - MCStreamer &Out, const MCAsmInfo &MAI) { - return new AsmParser(SM, C, Out, MAI); + MCStreamer &Out, const MCAsmInfo &MAI, + unsigned CB) { + return new AsmParser(SM, C, Out, MAI, CB); } diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 5cfa3ae..b9c01c6 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -7,36 +7,44 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCStreamer.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" +#include "llvm/MC/MCWinEH.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include -using namespace llvm; +#include +#include +#include -// Pin the vtables to this file. -MCTargetStreamer::~MCTargetStreamer() {} +using namespace llvm; MCTargetStreamer::MCTargetStreamer(MCStreamer &S) : Streamer(S) { S.setTargetStreamer(this); } +// Pin the vtables to this file. +MCTargetStreamer::~MCTargetStreamer() = default; + void MCTargetStreamer::emitLabel(MCSymbol *Symbol) {} void MCTargetStreamer::finish() {} @@ -125,7 +133,7 @@ void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size, if (!IsSectionRelative) EmitValueImpl(MCSymbolRefExpr::create(Sym, getContext()), Size); else - EmitCOFFSecRel32(Sym); + EmitCOFFSecRel32(Sym, /*Offset=*/0); } void MCStreamer::EmitDTPRel64Value(const MCExpr *Value) { @@ -290,10 +298,17 @@ void MCStreamer::AssignFragment(MCSymbol *Symbol, MCFragment *Fragment) { SymbolOrdering[Symbol] = 1 + SymbolOrdering.size(); } -void MCStreamer::EmitLabel(MCSymbol *Symbol) { +void MCStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) { + Symbol->redefineIfPossible(); + + if (!Symbol->isUndefined() || Symbol->isVariable()) + return getContext().reportError(Loc, "invalid symbol redefinition"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(getCurrentSectionOnly() && "Cannot emit before setting section!"); assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!"); + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + Symbol->setFragment(&getCurrentSectionOnly()->getDummyFragment()); MCTargetStreamer *TS = getTargetStreamer(); @@ -666,7 +681,7 @@ void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) { void MCStreamer::EmitWinCFIPushFrame(bool Code) { EnsureValidWinFrameInfo(); - if (CurrentWinFrameInfo->Instructions.size() > 0) + if (!CurrentWinFrameInfo->Instructions.empty()) report_fatal_error("If present, PushMachFrame must be the first UOP"); MCSymbol *Label = EmitCFILabel(); @@ -689,8 +704,7 @@ void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { } -void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { -} +void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {} /// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is @@ -793,12 +807,22 @@ void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo, void MCStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {} void MCStreamer::EmitThumbFunc(MCSymbol *Func) {} void MCStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} -void MCStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {} -void MCStreamer::EndCOFFSymbolDef() {} +void MCStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { + llvm_unreachable("this directive only supported on COFF targets"); +} +void MCStreamer::EndCOFFSymbolDef() { + llvm_unreachable("this directive only supported on COFF targets"); +} void MCStreamer::EmitFileDirective(StringRef Filename) {} -void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {} -void MCStreamer::EmitCOFFSymbolType(int Type) {} +void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { + llvm_unreachable("this directive only supported on COFF targets"); +} +void MCStreamer::EmitCOFFSymbolType(int Type) { + llvm_unreachable("this directive only supported on COFF targets"); +} void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} +void MCStreamer::emitELFSymverDirective(MCSymbol *Alias, + const MCSymbol *Aliasee) {} void MCStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index d10031b..bf1b74a 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -63,7 +63,7 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) { r = cdigit - 'a'; if (r <= radix - 11U) return r + 10; - + radix = 10; } @@ -81,6 +81,7 @@ void APInt::initSlowCase(uint64_t val, bool isSigned) { if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) pVal[i] = -1ULL; + clearUnusedBits(); } void APInt::initSlowCase(const APInt& that) { @@ -205,7 +206,7 @@ APInt& APInt::operator++() { /// This function subtracts a single "digit" (64-bit word), y, from /// the multi-digit integer array, x[], propagating the borrowed 1 value until -/// no further borrowing is neeeded or it runs out of "digits" in x. The result +/// no further borrowing is needed or it runs out of "digits" in x. The result /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted. /// In other words, if y > x then this function returns 1, otherwise 0. /// @returns the borrow out of the subtraction @@ -339,7 +340,7 @@ static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { /// Multiplies integer array x by integer array y and stores the result into /// the integer array dest. Note that dest's size must be >= xlen + ylen. -/// @brief Generalized multiplicate of integer arrays. +/// @brief Generalized multiplication of integer arrays. static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], unsigned ylen) { dest[xlen] = mul_1(dest, x, xlen, y[0]); @@ -424,6 +425,18 @@ APInt& APInt::operator&=(const APInt& RHS) { return *this; } +APInt &APInt::operator&=(uint64_t RHS) { + if (isSingleWord()) { + VAL &= RHS; + return *this; + } + pVal[0] &= RHS; + unsigned numWords = getNumWords(); + for (unsigned i = 1; i < numWords; ++i) + pVal[i] = 0; + return *this; +} + APInt& APInt::operator|=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { @@ -440,41 +453,12 @@ APInt& APInt::operator^=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { VAL ^= RHS.VAL; - this->clearUnusedBits(); return *this; } unsigned numWords = getNumWords(); for (unsigned i = 0; i < numWords; ++i) pVal[i] ^= RHS.pVal[i]; - return clearUnusedBits(); -} - -APInt APInt::AndSlowCase(const APInt& RHS) const { - unsigned numWords = getNumWords(); - uint64_t* val = getMemory(numWords); - for (unsigned i = 0; i < numWords; ++i) - val[i] = pVal[i] & RHS.pVal[i]; - return APInt(val, getBitWidth()); -} - -APInt APInt::OrSlowCase(const APInt& RHS) const { - unsigned numWords = getNumWords(); - uint64_t *val = getMemory(numWords); - for (unsigned i = 0; i < numWords; ++i) - val[i] = pVal[i] | RHS.pVal[i]; - return APInt(val, getBitWidth()); -} - -APInt APInt::XorSlowCase(const APInt& RHS) const { - unsigned numWords = getNumWords(); - uint64_t *val = getMemory(numWords); - for (unsigned i = 0; i < numWords; ++i) - val[i] = pVal[i] ^ RHS.pVal[i]; - - APInt Result(val, getBitWidth()); - // 0^0==1 so clear the high bits in case they got set. - Result.clearUnusedBits(); - return Result; + return *this; } APInt APInt::operator*(const APInt& RHS) const { @@ -511,11 +495,11 @@ bool APInt::ult(const APInt& RHS) const { if (n1 < n2) return true; - // If magnitude of RHS is greather than LHS, return false. + // If magnitude of RHS is greater than LHS, return false. if (n2 < n1) return false; - // If they bot fit in a word, just compare the low order word + // If they both fit in a word, just compare the low order word if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD) return pVal[0] < RHS.pVal[0]; @@ -545,7 +529,7 @@ bool APInt::slt(const APInt& RHS) const { if (lhsNeg != rhsNeg) return lhsNeg; - // Otherwise we can just use an unsigned comparision, because even negative + // Otherwise we can just use an unsigned comparison, because even negative // numbers compare correctly this way if both have the same signed-ness. return ult(RHS); } @@ -557,6 +541,33 @@ void APInt::setBit(unsigned bitPosition) { pVal[whichWord(bitPosition)] |= maskBit(bitPosition); } +void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { + unsigned loWord = whichWord(loBit); + unsigned hiWord = whichWord(hiBit); + + // Create an initial mask for the low word with zeros below loBit. + uint64_t loMask = UINT64_MAX << whichBit(loBit); + + // If hiBit is not aligned, we need a high mask. + unsigned hiShiftAmt = whichBit(hiBit); + if (hiShiftAmt != 0) { + // Create a high mask with zeros above hiBit. + uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); + // If loWord and hiWord are equal, then we combine the masks. Otherwise, + // set the bits in hiWord. + if (hiWord == loWord) + loMask &= hiMask; + else + pVal[hiWord] |= hiMask; + } + // Apply the mask to the low word. + pVal[loWord] |= loMask; + + // Fill any words between loWord and hiWord with all ones. + for (unsigned word = loWord + 1; word < hiWord; ++word) + pVal[word] = UINT64_MAX; +} + /// Set the given bit to 0 whose position is given as "bitPosition". /// @brief Set a given bit to 0. void APInt::clearBit(unsigned bitPosition) { @@ -577,9 +588,45 @@ void APInt::flipBit(unsigned bitPosition) { else setBit(bitPosition); } +APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { + assert(numBits > 0 && "Can't extract zero bits"); + assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && + "Illegal bit extraction"); + + if (isSingleWord()) + return APInt(numBits, VAL >> bitPosition); + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hiWord = whichWord(bitPosition + numBits - 1); + + // Single word result extracting bits from a single word source. + if (loWord == hiWord) + return APInt(numBits, pVal[loWord] >> loBit); + + // Extracting bits that start on a source word boundary can be done + // as a fast memory copy. + if (loBit == 0) + return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord)); + + // General case - shift + copy source words directly into place. + APInt Result(numBits, 0); + unsigned NumSrcWords = getNumWords(); + unsigned NumDstWords = Result.getNumWords(); + + for (unsigned word = 0; word < NumDstWords; ++word) { + uint64_t w0 = pVal[loWord + word]; + uint64_t w1 = + (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0; + Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); + } + + return Result.clearUnusedBits(); +} + unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { assert(!str.empty() && "Invalid string length"); - assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || + assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || radix == 36) && "Radix should be 2, 8, 10, 16, or 36!"); @@ -604,7 +651,7 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { return slen * 4 + isNegative; // FIXME: base 36 - + // This is grossly inefficient but accurate. We could probably do something // with a computation of roughly slen*64/20 and then adjust by the value of // the first few digits. But, I'm not sure how accurate that could be. @@ -613,7 +660,7 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { // be too large. This avoids the assertion in the constructor. This // calculation doesn't work appropriately for the numbers 0-9, so just use 4 // bits in that case. - unsigned sufficient + unsigned sufficient = radix == 10? (slen == 1 ? 4 : slen * 64/18) : (slen == 1 ? 7 : slen * 16/3); @@ -1244,8 +1291,21 @@ APInt APInt::shlSlowCase(unsigned shiftAmt) const { return Result; } +// Calculate the rotate amount modulo the bit width. +static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { + unsigned rotBitWidth = rotateAmt.getBitWidth(); + APInt rot = rotateAmt; + if (rotBitWidth < BitWidth) { + // Extend the rotate APInt, so that the urem doesn't divide by 0. + // e.g. APInt(1, 32) would give APInt(1, 0). + rot = rotateAmt.zext(BitWidth); + } + rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); + return rot.getLimitedValue(BitWidth); +} + APInt APInt::rotl(const APInt &rotateAmt) const { - return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth)); + return rotl(rotateModulo(BitWidth, rotateAmt)); } APInt APInt::rotl(unsigned rotateAmt) const { @@ -1256,7 +1316,7 @@ APInt APInt::rotl(unsigned rotateAmt) const { } APInt APInt::rotr(const APInt &rotateAmt) const { - return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth)); + return rotr(rotateModulo(BitWidth, rotateAmt)); } APInt APInt::rotr(unsigned rotateAmt) const { @@ -1618,7 +1678,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, if (r) { // The value d is expressed by the "shift" value above since we avoided // multiplication by d by using a shift left. So, all we have to do is - // shift right here. In order to mak + // shift right here. if (shift) { unsigned carry = 0; DEBUG(dbgs() << "KnuthDiv: remainder:"); @@ -2014,7 +2074,7 @@ APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { APInt Res = *this * RHS; - + if (*this != 0 && RHS != 0) Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; else @@ -2041,7 +2101,7 @@ APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { Overflow = ShAmt.uge(countLeadingZeros()); else Overflow = ShAmt.uge(countLeadingOnes()); - + return *this << ShAmt; } @@ -2061,7 +2121,7 @@ APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { // Check our assumptions here assert(!str.empty() && "Invalid string length"); - assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || + assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || radix == 36) && "Radix should be 2, 8, 10, 16, or 36!"); @@ -2120,7 +2180,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { void APInt::toString(SmallVectorImpl &Str, unsigned Radix, bool Signed, bool formatAsCLiteral) const { - assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || + assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || Radix == 36) && "Radix should be 2, 8, 10, 16, or 36!"); @@ -2245,14 +2305,15 @@ std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { return S.str(); } - +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void APInt::dump() const { SmallString<40> S, U; this->toStringUnsigned(U); this->toStringSigned(S); dbgs() << "APInt(" << BitWidth << "b, " - << U << "u " << S << "s)"; + << U << "u " << S << "s)\n"; } +#endif void APInt::print(raw_ostream &OS, bool isSigned) const { SmallString<40> S; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index f127c40..a03ca04 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -519,6 +519,7 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) { .EndsWith("coff", Triple::COFF) .EndsWith("elf", Triple::ELF) .EndsWith("macho", Triple::MachO) + .EndsWith("wasm", Triple::Wasm) .Default(Triple::UnknownObjectFormat); } @@ -559,6 +560,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { case ARM::AK_ARMV7A: case ARM::AK_ARMV7R: return Triple::ARMSubArch_v7; + case ARM::AK_ARMV7VE: + return Triple::ARMSubArch_v7ve; case ARM::AK_ARMV7K: return Triple::ARMSubArch_v7k; case ARM::AK_ARMV7M: @@ -590,6 +593,7 @@ static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) { case Triple::COFF: return "coff"; case Triple::ELF: return "elf"; case Triple::MachO: return "macho"; + case Triple::Wasm: return "wasm"; } llvm_unreachable("unknown object format type"); } @@ -1534,6 +1538,7 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const { return "strongarm"; } case llvm::Triple::NaCl: + case llvm::Triple::OpenBSD: return "cortex-a8"; default: switch (getEnvironment()) { diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4dff402..8a5b6e3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11,28 +11,80 @@ // //===----------------------------------------------------------------------===// -#include "AArch64ISelLowering.h" #include "AArch64CallingConvention.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64ISelLowering.h" #include "AArch64PerfectShuffle.h" +#include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" -#include "AArch64TargetObjectFile.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetCallingConv.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "aarch64-lower" @@ -59,7 +111,6 @@ static const MVT MVT_CC = MVT::i32; AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { - // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so // we have to make something up. Arbitrarily, choose ZeroOrOne. setBooleanContents(ZeroOrOneBooleanContent); @@ -109,6 +160,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, MVT::i64, Custom); setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::i64, Custom); @@ -218,7 +271,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i64, Custom); @@ -503,8 +555,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setSchedulingPreference(Sched::Hybrid); - // Enable TBZ/TBNZ - MaskAndBranchFoldingIsLegal = true; EnableExtLdPromotion = true; // Set required alignment. @@ -3104,7 +3154,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } if (VA.isRegLoc()) { - if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) { + if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && + Outs[0].VT == MVT::i64) { assert(VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"); assert(!Ins.empty() && Ins[0].VT == MVT::i64 && @@ -3632,6 +3683,7 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, llvm_unreachable("Unexpected platform trying to use TLS"); } + SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); @@ -4549,7 +4601,6 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, return DAG.getMergeValues(Ops, dl); } - /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i64 values and take a 2 x i64 value to shift plus a shift amount. SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, @@ -5074,10 +5125,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, int WindowBase; int WindowScale; - bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } ShuffleSourceInfo(SDValue Vec) - : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0), - WindowScale(1) {} + : Vec(Vec), MinElt(std::numeric_limits::max()), MaxElt(0), + ShuffleVec(Vec), WindowBase(0), WindowScale(1) {} + + bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } }; // First gather all vectors used as an immediate source for this BUILD_VECTOR @@ -7028,7 +7080,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } case Intrinsic::aarch64_ldaxp: - case Intrinsic::aarch64_ldxp: { + case Intrinsic::aarch64_ldxp: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(0); @@ -7038,9 +7090,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.readMem = true; Info.writeMem = false; return true; - } case Intrinsic::aarch64_stlxp: - case Intrinsic::aarch64_stxp: { + case Intrinsic::aarch64_stxp: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(2); @@ -7050,7 +7101,6 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.readMem = false; Info.writeMem = true; return true; - } default: break; } @@ -7198,6 +7248,13 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType, return NumBits == 32 || NumBits == 64; } +/// A helper function for determining the number of interleaved accesses we +/// will generate when lowering accesses of the given type. +static unsigned getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) { + return (DL.getTypeSizeInBits(VecTy) + 127) / 128; +} + /// \brief Lower an interleaved load into a ldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -7223,10 +7280,14 @@ bool AArch64TargetLowering::lowerInterleavedLoad( VectorType *VecTy = Shuffles[0]->getType(); unsigned VecSize = DL.getTypeSizeInBits(VecTy); - // Skip if we do not have NEON and skip illegal vector types. - if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128)) + // Skip if we do not have NEON and skip illegal vector types. We can + // "legalize" wide vector types into multiple interleaved accesses as long as + // the vector types are divisible by 128. + if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize % 128 != 0)) return false; + unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL); + // A pointer vector can not be the return type of the ldN intrinsics. Need to // load integer vectors first and then convert to pointer vectors. Type *EltTy = VecTy->getVectorElementType(); @@ -7234,6 +7295,25 @@ bool AArch64TargetLowering::lowerInterleavedLoad( VecTy = VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); + IRBuilder<> Builder(LI); + + // The base address of the load. + Value *BaseAddr = LI->getPointerOperand(); + + if (NumLoads > 1) { + // If we're going to generate more than one load, reset the sub-vector type + // to something legal. + VecTy = VectorType::get(VecTy->getVectorElementType(), + VecTy->getVectorNumElements() / NumLoads); + + // We will compute the pointer operand of each load from the original base + // address using GEPs. Cast the base address to a pointer to the scalar + // element type. + BaseAddr = Builder.CreateBitCast( + BaseAddr, VecTy->getVectorElementType()->getPointerTo( + LI->getPointerAddressSpace())); + } + Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace()); Type *Tys[2] = {VecTy, PtrTy}; static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2, @@ -7242,39 +7322,49 @@ bool AArch64TargetLowering::lowerInterleavedLoad( Function *LdNFunc = Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); - IRBuilder<> Builder(LI); - Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy); + // Holds sub-vectors extracted from the load intrinsic return values. The + // sub-vectors are associated with the shufflevector instructions they will + // replace. + DenseMap> SubVecs; - CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN"); + for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { - // Replace uses of each shufflevector with the corresponding vector loaded - // by ldN. - for (unsigned i = 0; i < Shuffles.size(); i++) { - ShuffleVectorInst *SVI = Shuffles[i]; - unsigned Index = Indices[i]; + // If we're generating more than one load, compute the base address of + // subsequent loads as an offset from the previous. + if (LoadCount > 0) + BaseAddr = Builder.CreateConstGEP1_32( + BaseAddr, VecTy->getVectorNumElements() * Factor); - Value *SubVec = Builder.CreateExtractValue(LdN, Index); + CallInst *LdN = Builder.CreateCall( + LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN"); - // Convert the integer vector to pointer vector if the element is pointer. - if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType()); + // Extract and store the sub-vectors returned by the load intrinsic. + for (unsigned i = 0; i < Shuffles.size(); i++) { + ShuffleVectorInst *SVI = Shuffles[i]; + unsigned Index = Indices[i]; - SVI->replaceAllUsesWith(SubVec); - } + Value *SubVec = Builder.CreateExtractValue(LdN, Index); - return true; -} + // Convert the integer vector to pointer vector if the element is pointer. + if (EltTy->isPointerTy()) + SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType()); -/// \brief Get a mask consisting of sequential integers starting from \p Start. -/// -/// I.e. -static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, - unsigned NumElts) { - SmallVector Mask; - for (unsigned i = 0; i < NumElts; i++) - Mask.push_back(Builder.getInt32(Start + i)); + SubVecs[SVI].push_back(SubVec); + } + } + + // Replace uses of the shufflevector instructions with the sub-vectors + // returned by the load intrinsic. If a shufflevector instruction is + // associated with more than one sub-vector, those sub-vectors will be + // concatenated into a single wide vector. + for (ShuffleVectorInst *SVI : Shuffles) { + auto &SubVec = SubVecs[SVI]; + auto *WideVec = + SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0]; + SVI->replaceAllUsesWith(WideVec); + } - return ConstantVector::get(Mask); + return true; } /// \brief Lower an interleaved store into a stN intrinsic. @@ -7320,10 +7410,14 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, const DataLayout &DL = SI->getModule()->getDataLayout(); unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); - // Skip if we do not have NEON and skip illegal vector types. - if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128)) + // Skip if we do not have NEON and skip illegal vector types. We can + // "legalize" wide vector types into multiple interleaved accesses as long as + // the vector types are divisible by 128. + if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize % 128 != 0)) return false; + unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL); + Value *Op0 = SVI->getOperand(0); Value *Op1 = SVI->getOperand(1); IRBuilder<> Builder(SI); @@ -7343,6 +7437,25 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, SubVecTy = VectorType::get(IntTy, LaneLen); } + // The base address of the store. + Value *BaseAddr = SI->getPointerOperand(); + + if (NumStores > 1) { + // If we're going to generate more than one store, reset the lane length + // and sub-vector type to something legal. + LaneLen /= NumStores; + SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen); + + // We will compute the pointer operand of each store from the original base + // address using GEPs. Cast the base address to a pointer to the scalar + // element type. + BaseAddr = Builder.CreateBitCast( + BaseAddr, SubVecTy->getVectorElementType()->getPointerTo( + SI->getPointerAddressSpace())); + } + + auto Mask = SVI->getShuffleMask(); + Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace()); Type *Tys[2] = {SubVecTy, PtrTy}; static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2, @@ -7351,34 +7464,43 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, Function *StNFunc = Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys); - SmallVector Ops; + for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) { - // Split the shufflevector operands into sub vectors for the new stN call. - auto Mask = SVI->getShuffleMask(); - for (unsigned i = 0; i < Factor; i++) { - if (Mask[i] >= 0) { - Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); - } else { - unsigned StartMask = 0; - for (unsigned j = 1; j < LaneLen; j++) { - if (Mask[j*Factor + i] >= 0) { - StartMask = Mask[j*Factor + i] - j; - break; + SmallVector Ops; + + // Split the shufflevector operands into sub vectors for the new stN call. + for (unsigned i = 0; i < Factor; i++) { + unsigned IdxI = StoreCount * LaneLen * Factor + i; + if (Mask[IdxI] >= 0) { + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0))); + } else { + unsigned StartMask = 0; + for (unsigned j = 1; j < LaneLen; j++) { + unsigned IdxJ = StoreCount * LaneLen * Factor + j; + if (Mask[IdxJ * Factor + IdxI] >= 0) { + StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ; + break; + } } + // Note: Filling undef gaps with random elements is ok, since + // those elements were being written anyway (with undefs). + // In the case of all undefs we're defaulting to using elems from 0 + // Note: StartMask cannot be negative, it's checked in + // isReInterleaveMask + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0))); } - // Note: If all elements in a chunk are undefs, StartMask=0! - // Note: Filling undef gaps with random elements is ok, since - // those elements were being written anyway (with undefs). - // In the case of all undefs we're defaulting to using elems from 0 - // Note: StartMask cannot be negative, it's checked in isReInterleaveMask - Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); } - } - Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy)); - Builder.CreateCall(StNFunc, Ops); + // If we generating more than one store, we compute the base address of + // subsequent stores as an offset from the previous. + if (StoreCount > 0) + BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor); + + Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy)); + Builder.CreateCall(StNFunc, Ops); + } return true; } @@ -7481,8 +7603,7 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2 - return !AM.Scale || AM.Scale == 1 || - (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes); + return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes); } int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL, @@ -8045,13 +8166,13 @@ static SDValue tryCombineToEXTR(SDNode *N, SDValue LHS; uint32_t ShiftLHS = 0; - bool LHSFromHi = 0; + bool LHSFromHi = false; if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) return SDValue(); SDValue RHS; uint32_t ShiftRHS = 0; - bool RHSFromHi = 0; + bool RHSFromHi = false; if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) return SDValue(); @@ -8885,8 +9006,9 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, // instructions (stp). SDLoc DL(&St); SDValue BasePtr = St.getBasePtr(); + const MachinePointerInfo &PtrInfo = St.getPointerInfo(); SDValue NewST1 = - DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, St.getPointerInfo(), + DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo, OrigAlignment, St.getMemOperand()->getFlags()); unsigned Offset = EltOffset; @@ -8895,7 +9017,7 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, DAG.getConstant(Offset, DL, MVT::i64)); NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, - St.getPointerInfo(), Alignment, + PtrInfo.getWithOffset(Offset), Alignment, St.getMemOperand()->getFlags()); Offset += EltOffset; } @@ -9733,52 +9855,51 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width, switch(CC) { case AArch64CC::LE: - case AArch64CC::GT: { + case AArch64CC::GT: if ((AddConstant == 0) || (CompConstant == MaxUInt - 1 && AddConstant < 0) || (AddConstant >= 0 && CompConstant < 0) || (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant)) return true; - } break; + break; case AArch64CC::LT: - case AArch64CC::GE: { + case AArch64CC::GE: if ((AddConstant == 0) || (AddConstant >= 0 && CompConstant <= 0) || (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant)) return true; - } break; + break; case AArch64CC::HI: - case AArch64CC::LS: { + case AArch64CC::LS: if ((AddConstant >= 0 && CompConstant < 0) || (AddConstant <= 0 && CompConstant >= -1 && CompConstant < AddConstant + MaxUInt)) return true; - } break; + break; case AArch64CC::PL: - case AArch64CC::MI: { + case AArch64CC::MI: if ((AddConstant == 0) || (AddConstant > 0 && CompConstant <= 0) || (AddConstant < 0 && CompConstant <= AddConstant)) return true; - } break; + break; case AArch64CC::LO: - case AArch64CC::HS: { + case AArch64CC::HS: if ((AddConstant >= 0 && CompConstant <= 0) || (AddConstant <= 0 && CompConstant >= 0 && CompConstant <= AddConstant + MaxUInt)) return true; - } break; + break; case AArch64CC::EQ: - case AArch64CC::NE: { + case AArch64CC::NE: if ((AddConstant > 0 && CompConstant < 0) || (AddConstant < 0 && CompConstant >= 0 && CompConstant < AddConstant + MaxUInt) || (AddConstant >= 0 && CompConstant >= 0 && CompConstant >= AddConstant) || (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant)) - return true; - } break; + break; case AArch64CC::VS: case AArch64CC::VC: case AArch64CC::AL: @@ -10273,8 +10394,10 @@ bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, // All of the indexed addressing mode instructions take a signed // 9 bit immediate offset. if (ConstantSDNode *RHS = dyn_cast(Op->getOperand(1))) { - int64_t RHSC = (int64_t)RHS->getZExtValue(); - if (RHSC >= 256 || RHSC <= -256) + int64_t RHSC = RHS->getSExtValue(); + if (Op->getOpcode() == ISD::SUB) + RHSC = -(uint64_t)RHSC; + if (!isInt<9>(RHSC)) return false; IsInc = (Op->getOpcode() == ISD::ADD); Offset = Op->getOperand(1); @@ -10431,9 +10554,9 @@ void AArch64TargetLowering::ReplaceNodeResults( } bool AArch64TargetLowering::useLoadStackGuardNode() const { - if (!Subtarget->isTargetAndroid()) - return true; - return TargetLowering::useLoadStackGuardNode(); + if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia()) + return TargetLowering::useLoadStackGuardNode(); + return true; } unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const { @@ -10500,7 +10623,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, if (ValTy->getPrimitiveSizeInBits() == 128) { Intrinsic::ID Int = IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp; - Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int); + Function *Ldxr = Intrinsic::getDeclaration(M, Int); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi"); @@ -10516,7 +10639,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; - Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys); + Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateTruncOrBitCast( Builder.CreateCall(Ldxr, Addr), @@ -10526,8 +10649,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance( IRBuilder<> &Builder) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Builder.CreateCall( - llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex)); + Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex)); } Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, @@ -10572,36 +10694,56 @@ bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, return false; } -Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const { - if (!Subtarget->isTargetAndroid()) - return TargetLowering::getIRStackGuard(IRB); - - // Android provides a fixed TLS slot for the stack cookie. See the definition - // of TLS_SLOT_STACK_GUARD in - // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h - const unsigned TlsOffset = 0x28; +static Value *UseTlsOffset(IRBuilder<> &IRB, unsigned Offset) { Module *M = IRB.GetInsertBlock()->getParent()->getParent(); Function *ThreadPointerFunc = Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); return IRB.CreatePointerCast( - IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset), + IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), Offset), Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0)); } -Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { - if (!Subtarget->isTargetAndroid()) - return TargetLowering::getSafeStackPointerLocation(IRB); +Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const { + // Android provides a fixed TLS slot for the stack cookie. See the definition + // of TLS_SLOT_STACK_GUARD in + // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h + if (Subtarget->isTargetAndroid()) + return UseTlsOffset(IRB, 0x28); + + // Fuchsia is similar. + // defines MX_TLS_STACK_GUARD_OFFSET with this value. + if (Subtarget->isTargetFuchsia()) + return UseTlsOffset(IRB, -0x10); + return TargetLowering::getIRStackGuard(IRB); +} + +Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { // Android provides a fixed TLS slot for the SafeStack pointer. See the // definition of TLS_SLOT_SAFESTACK in // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h - const unsigned TlsOffset = 0x48; - Module *M = IRB.GetInsertBlock()->getParent()->getParent(); - Function *ThreadPointerFunc = - Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); - return IRB.CreatePointerCast( - IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset), - Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0)); + if (Subtarget->isTargetAndroid()) + return UseTlsOffset(IRB, 0x48); + + // Fuchsia is similar. + // defines MX_TLS_UNSAFE_SP_OFFSET with this value. + if (Subtarget->isTargetFuchsia()) + return UseTlsOffset(IRB, -0x8); + + return TargetLowering::getSafeStackPointerLocation(IRB); +} + +bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + // Only sink 'and' mask to cmp use block if it is masking a single bit, since + // this is likely to be fold the and/cmp/br into a single tbz instruction. It + // may be beneficial to sink in other cases, but we would have to check that + // the cmp would not get folded into the br to form a cbz for these to be + // beneficial. + ConstantInt* Mask = dyn_cast(AndI.getOperand(1)); + if (!Mask) + return false; + return Mask->getUniqueInteger().isPowerOf2(); } void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { @@ -10663,3 +10805,11 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const { Attr.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); return OptSize && !VT.isVector(); } + +unsigned +AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const { + if (Subtarget->isTargetDarwin()) + return getPointerTy(DL).getSizeInBits(); + + return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32; +} diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 7912789..a8bd4e3 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -15,6 +15,7 @@ #include "AMDGPUISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUCallLowering.h" #include "AMDGPUFrameLowering.h" #include "AMDGPUIntrinsicInfo.h" #include "AMDGPURegisterInfo.h" @@ -172,16 +173,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v2f64, Promote); AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32); - setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); - setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); - - setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); - setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); - - setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); - setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); - setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); - setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i8, Expand); setTruncStoreAction(MVT::i64, MVT::i16, Expand); @@ -444,6 +435,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setSchedulingPreference(Sched::RegPressure); setJumpIsExpensive(true); + + // FIXME: This is only partially true. If we have to do vector compares, any + // SGPR pair can be a condition register. If we have a uniform condition, we + // are better off doing SALU operations, where there is only one SCC. For now, + // we don't have a way of knowing during instruction selection if a condition + // will be uniform and we always use vector compares. Assume we are using + // vector compares until that is fixed. setHasMultipleConditionRegisters(true); // SI at least has hardware support for floating point exceptions, but no way @@ -463,10 +461,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, // N > 4 stores on the same chain. GatherAllAliasesMaxDepth = 16; - // FIXME: Need to really handle these. - MaxStoresPerMemcpy = 4096; - MaxStoresPerMemmove = 4096; - MaxStoresPerMemset = 4096; + // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry + // about these during lowering. + MaxStoresPerMemcpy = 0xffffffff; + MaxStoresPerMemmove = 0xffffffff; + MaxStoresPerMemset = 0xffffffff; setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::SHL); @@ -480,12 +479,93 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); + setTargetDAGCombine(ISD::FNEG); + setTargetDAGCombine(ISD::FABS); } //===----------------------------------------------------------------------===// // Target Information //===----------------------------------------------------------------------===// +LLVM_READNONE +static bool fnegFoldsIntoOp(unsigned Opc) { + switch (Opc) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FMA: + case ISD::FMAD: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FSIN: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: + case AMDGPUISD::RCP: + case AMDGPUISD::RCP_LEGACY: + case AMDGPUISD::SIN_HW: + case AMDGPUISD::FMUL_LEGACY: + case AMDGPUISD::FMIN_LEGACY: + case AMDGPUISD::FMAX_LEGACY: + return true; + default: + return false; + } +} + +/// \p returns true if the operation will definitely need to use a 64-bit +/// encoding, and thus will use a VOP3 encoding regardless of the source +/// modifiers. +LLVM_READONLY +static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) { + return N->getNumOperands() > 2 || VT == MVT::f64; +} + +// Most FP instructions support source modifiers, but this could be refined +// slightly. +LLVM_READONLY +static bool hasSourceMods(const SDNode *N) { + if (isa(N)) + return false; + + switch (N->getOpcode()) { + case ISD::CopyToReg: + case ISD::SELECT: + case ISD::FDIV: + case ISD::FREM: + case ISD::INLINEASM: + case AMDGPUISD::INTERP_P1: + case AMDGPUISD::INTERP_P2: + case AMDGPUISD::DIV_SCALE: + return false; + default: + return true; + } +} + +static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold = 4) { + // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus + // it is truly free to use a source modifier in all cases. If there are + // multiple users but for each one will necessitate using VOP3, there will be + // a code size increase. Try to avoid increasing code size unless we know it + // will save on the instruction count. + unsigned NumMayIncreaseSize = 0; + MVT VT = N->getValueType(0).getScalarType().getSimpleVT(); + + // XXX - Should this limit number of uses to check? + for (const SDNode *U : N->uses()) { + if (!hasSourceMods(U)) + return false; + + if (!opMustUseVOP3Encoding(U, VT)) { + if (++NumMayIncreaseSize > CostThreshold) + return false; + } + } + + return true; +} + MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { return MVT::i32; } @@ -498,7 +578,8 @@ bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const { // FIXME: Why are we reporting vectors of FP immediates as legal? bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { EVT ScalarVT = VT.getScalarType(); - return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64); + return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 || + (ScalarVT == MVT::f16 && Subtarget->has16BitInsts())); } // We don't want to shrink f64 / f32 constants. @@ -563,12 +644,17 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const { bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { assert(VT.isFloatingPoint()); - return VT == MVT::f32 || VT == MVT::f64 || (Subtarget->has16BitInsts() && - VT == MVT::f16); + + // Packed operations do not have a fabs modifier. + return VT == MVT::f32 || VT == MVT::f64 || + (Subtarget->has16BitInsts() && VT == MVT::f16); } bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { - return isFAbsFree(VT); + assert(VT.isFloatingPoint()); + return VT == MVT::f32 || VT == MVT::f64 || + (Subtarget->has16BitInsts() && VT == MVT::f16) || + (Subtarget->hasVOP3PInsts() && VT == MVT::v2f16); } bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT, @@ -650,6 +736,11 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { // TargetLowering Callbacks //===---------------------------------------------------------------------===// +CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, + bool IsVarArg) const { + return CC_AMDGPU; +} + /// The SelectionDAGBuilder will automatically promote function arguments /// with illegal types. However, this does not work for the AMDGPU targets /// since the function arguments are stored in memory as these illegal types. @@ -789,8 +880,10 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, Fn, "unsupported call to function " + FuncName, CLI.DL.getDebugLoc()); DAG.getContext()->diagnose(NoCalls); - for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I) - InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT)); + if (!CLI.IsTailCall) { + for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I) + InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT)); + } return DAG.getEntryNode(); } @@ -810,7 +903,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: - Op->dump(&DAG); + Op->print(errs(), &DAG); llvm_unreachable("Custom lowering code for this" "instruction is not implemented yet!"); break; @@ -924,34 +1017,27 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, EVT VT = Op.getValueType(); switch (IntrinsicID) { - default: return Op; - case AMDGPUIntrinsic::AMDGPU_clamp: // Legacy name. - return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, - Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - - case AMDGPUIntrinsic::AMDGPU_bfe_i32: - return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, - Op.getOperand(1), - Op.getOperand(2), - Op.getOperand(3)); + default: return Op; + case AMDGPUIntrinsic::AMDGPU_bfe_i32: + return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, + Op.getOperand(1), + Op.getOperand(2), + Op.getOperand(3)); - case AMDGPUIntrinsic::AMDGPU_bfe_u32: - return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT, - Op.getOperand(1), - Op.getOperand(2), - Op.getOperand(3)); + case AMDGPUIntrinsic::AMDGPU_bfe_u32: + return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT, + Op.getOperand(1), + Op.getOperand(2), + Op.getOperand(3)); } } /// \brief Generate Min/Max node -SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(const SDLoc &DL, EVT VT, +SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const { - if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - return SDValue(); - if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); @@ -1209,7 +1295,10 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq); // float fr = mad(fqneg, fb, fa); - SDValue fr = DAG.getNode(ISD::FMAD, DL, FltVT, fqneg, fb, fa); + unsigned OpCode = Subtarget->hasFP32Denormals() ? + (unsigned)AMDGPUISD::FMAD_FTZ : + (unsigned)ISD::FMAD; + SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa); // int iq = (int)fq; SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq); @@ -2360,6 +2449,28 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, SN->getBasePtr(), SN->getMemOperand()); } +SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + ConstantFPSDNode *CSrc = dyn_cast(N->getOperand(0)); + if (!CSrc) + return SDValue(); + + const APFloat &F = CSrc->getValueAPF(); + APFloat Zero = APFloat::getZero(F.getSemantics()); + APFloat::cmpResult Cmp0 = F.compare(Zero); + if (Cmp0 == APFloat::cmpLessThan || + (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) { + return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0)); + } + + APFloat One(F.getSemantics(), "1.0"); + APFloat::cmpResult Cmp1 = F.compare(One); + if (Cmp1 == APFloat::cmpGreaterThan) + return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0)); + + return SDValue(CSrc, 0); +} + /// Split the 64-bit value \p LHS into two 32-bit components, and perform the /// binary operation \p Opc to it with the corresponding constant operands. SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( @@ -2686,8 +2797,93 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(const SDLoc &SL, SDValue Cond, return SDValue(); } +static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, + unsigned Op, + const SDLoc &SL, + SDValue Cond, + SDValue N1, + SDValue N2) { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N1.getValueType(); + + SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond, + N1.getOperand(0), N2.getOperand(0)); + DCI.AddToWorklist(NewSelect.getNode()); + return DAG.getNode(Op, SL, VT, NewSelect); +} + +// Pull a free FP operation out of a select so it may fold into uses. +// +// select c, (fneg x), (fneg y) -> fneg (select c, x, y) +// select c, (fneg x), k -> fneg (select c, x, (fneg k)) +// +// select c, (fabs x), (fabs y) -> fabs (select c, x, y) +// select c, (fabs x), +k -> fabs (select c, x, k) +static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, + SDValue N) { + SelectionDAG &DAG = DCI.DAG; + SDValue Cond = N.getOperand(0); + SDValue LHS = N.getOperand(1); + SDValue RHS = N.getOperand(2); + + EVT VT = N.getValueType(); + if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || + (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) { + return distributeOpThroughSelect(DCI, LHS.getOpcode(), + SDLoc(N), Cond, LHS, RHS); + } + + bool Inv = false; + if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) { + std::swap(LHS, RHS); + Inv = true; + } + + // TODO: Support vector constants. + ConstantFPSDNode *CRHS = dyn_cast(RHS); + if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) { + SDLoc SL(N); + // If one side is an fneg/fabs and the other is a constant, we can push the + // fneg/fabs down. If it's an fabs, the constant needs to be non-negative. + SDValue NewLHS = LHS.getOperand(0); + SDValue NewRHS = RHS; + + // Careful: if the neg can be folded up, don't try to pull it back down. + bool ShouldFoldNeg = true; + + if (NewLHS.hasOneUse()) { + unsigned Opc = NewLHS.getOpcode(); + if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc)) + ShouldFoldNeg = false; + if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL) + ShouldFoldNeg = false; + } + + if (ShouldFoldNeg) { + if (LHS.getOpcode() == ISD::FNEG) + NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + else if (CRHS->isNegative()) + return SDValue(); + + if (Inv) + std::swap(NewLHS, NewRHS); + + SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, + Cond, NewLHS, NewRHS); + DCI.AddToWorklist(NewSelect.getNode()); + return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect); + } + } + + return SDValue(); +} + + SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const { + if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0))) + return Folded; + SDValue Cond = N->getOperand(0); if (Cond.getOpcode() != ISD::SETCC) return SDValue(); @@ -2700,18 +2896,262 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, SDValue True = N->getOperand(1); SDValue False = N->getOperand(2); - if (VT == MVT::f32 && Cond.hasOneUse()) { - SDValue MinMax - = CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); - // Revisit this node so we can catch min3/max3/med3 patterns. - //DCI.AddToWorklist(MinMax.getNode()); - return MinMax; + if (Cond.hasOneUse()) { // TODO: Look for multiple select uses. + SelectionDAG &DAG = DCI.DAG; + if ((DAG.isConstantValueOfAnyType(True) || + DAG.isConstantValueOfAnyType(True)) && + (!DAG.isConstantValueOfAnyType(False) && + !DAG.isConstantValueOfAnyType(False))) { + // Swap cmp + select pair to move constant to false input. + // This will allow using VOPC cndmasks more often. + // select (setcc x, y), k, x -> select (setcc y, x) x, x + + SDLoc SL(N); + ISD::CondCode NewCC = getSetCCInverse(cast(CC)->get(), + LHS.getValueType().isInteger()); + + SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC); + return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True); + } + + if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) { + SDValue MinMax + = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); + // Revisit this node so we can catch min3/max3/med3 patterns. + //DCI.AddToWorklist(MinMax.getNode()); + return MinMax; + } } // There's no reason to not do this if the condition has other uses. return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); } +static bool isConstantFPZero(SDValue N) { + if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) + return C->isZero() && !C->isNegative(); + return false; +} + +static unsigned inverseMinMax(unsigned Opc) { + switch (Opc) { + case ISD::FMAXNUM: + return ISD::FMINNUM; + case ISD::FMINNUM: + return ISD::FMAXNUM; + case AMDGPUISD::FMAX_LEGACY: + return AMDGPUISD::FMIN_LEGACY; + case AMDGPUISD::FMIN_LEGACY: + return AMDGPUISD::FMAX_LEGACY; + default: + llvm_unreachable("invalid min/max opcode"); + } +} + +SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + unsigned Opc = N0.getOpcode(); + + // If the input has multiple uses and we can either fold the negate down, or + // the other uses cannot, give up. This both prevents unprofitable + // transformations and infinite loops: we won't repeatedly try to fold around + // a negate that has no 'good' form. + if (N0.hasOneUse()) { + // This may be able to fold into the source, but at a code size cost. Don't + // fold if the fold into the user is free. + if (allUsesHaveSourceMods(N, 0)) + return SDValue(); + } else { + if (fnegFoldsIntoOp(Opc) && + (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode()))) + return SDValue(); + } + + SDLoc SL(N); + switch (Opc) { + case ISD::FADD: { + if (!mayIgnoreSignedZero(N0)) + return SDValue(); + + // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) + SDValue LHS = N0.getOperand(0); + SDValue RHS = N0.getOperand(1); + + if (LHS.getOpcode() != ISD::FNEG) + LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); + else + LHS = LHS.getOperand(0); + + if (RHS.getOpcode() != ISD::FNEG) + RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + else + RHS = RHS.getOperand(0); + + SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags()); + if (!N0.hasOneUse()) + DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); + return Res; + } + case ISD::FMUL: + case AMDGPUISD::FMUL_LEGACY: { + // (fneg (fmul x, y)) -> (fmul x, (fneg y)) + // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y)) + SDValue LHS = N0.getOperand(0); + SDValue RHS = N0.getOperand(1); + + if (LHS.getOpcode() == ISD::FNEG) + LHS = LHS.getOperand(0); + else if (RHS.getOpcode() == ISD::FNEG) + RHS = RHS.getOperand(0); + else + RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + + SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags()); + if (!N0.hasOneUse()) + DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); + return Res; + } + case ISD::FMA: + case ISD::FMAD: { + if (!mayIgnoreSignedZero(N0)) + return SDValue(); + + // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z)) + SDValue LHS = N0.getOperand(0); + SDValue MHS = N0.getOperand(1); + SDValue RHS = N0.getOperand(2); + + if (LHS.getOpcode() == ISD::FNEG) + LHS = LHS.getOperand(0); + else if (MHS.getOpcode() == ISD::FNEG) + MHS = MHS.getOperand(0); + else + MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS); + + if (RHS.getOpcode() != ISD::FNEG) + RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + else + RHS = RHS.getOperand(0); + + SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS); + if (!N0.hasOneUse()) + DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); + return Res; + } + case ISD::FMAXNUM: + case ISD::FMINNUM: + case AMDGPUISD::FMAX_LEGACY: + case AMDGPUISD::FMIN_LEGACY: { + // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y) + // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y) + // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y) + // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y) + + SDValue LHS = N0.getOperand(0); + SDValue RHS = N0.getOperand(1); + + // 0 doesn't have a negated inline immediate. + // TODO: Shouldn't fold 1/2pi either, and should be generalized to other + // operations. + if (isConstantFPZero(RHS)) + return SDValue(); + + SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); + SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); + unsigned Opposite = inverseMinMax(Opc); + + SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags()); + if (!N0.hasOneUse()) + DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); + return Res; + } + case ISD::FP_EXTEND: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: // XXX - Should fround be handled? + case ISD::FSIN: + case AMDGPUISD::RCP: + case AMDGPUISD::RCP_LEGACY: + case AMDGPUISD::SIN_HW: { + SDValue CvtSrc = N0.getOperand(0); + if (CvtSrc.getOpcode() == ISD::FNEG) { + // (fneg (fp_extend (fneg x))) -> (fp_extend x) + // (fneg (rcp (fneg x))) -> (rcp x) + return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0)); + } + + if (!N0.hasOneUse()) + return SDValue(); + + // (fneg (fp_extend x)) -> (fp_extend (fneg x)) + // (fneg (rcp x)) -> (rcp (fneg x)) + SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); + return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags()); + } + case ISD::FP_ROUND: { + SDValue CvtSrc = N0.getOperand(0); + + if (CvtSrc.getOpcode() == ISD::FNEG) { + // (fneg (fp_round (fneg x))) -> (fp_round x) + return DAG.getNode(ISD::FP_ROUND, SL, VT, + CvtSrc.getOperand(0), N0.getOperand(1)); + } + + if (!N0.hasOneUse()) + return SDValue(); + + // (fneg (fp_round x)) -> (fp_round (fneg x)) + SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); + return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1)); + } + case ISD::FP16_TO_FP: { + // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal + // f16, but legalization of f16 fneg ends up pulling it out of the source. + // Put the fneg back as a legal source operation that can be matched later. + SDLoc SL(N); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000) + SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src, + DAG.getConstant(0x8000, SL, SrcVT)); + return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg); + } + default: + return SDValue(); + } +} + +SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + + if (!N0.hasOneUse()) + return SDValue(); + + switch (N0.getOpcode()) { + case ISD::FP16_TO_FP: { + assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal"); + SDLoc SL(N); + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff) + SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src, + DAG.getConstant(0x7fff, SL, SrcVT)); + return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs); + } + default: + return SDValue(); + } +} + SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -2817,6 +3257,10 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performMulLoHi24Combine(N, DCI); case ISD::SELECT: return performSelectCombine(N, DCI); + case ISD::FNEG: + return performFNegCombine(N, DCI); + case ISD::FABS: + return performFAbsCombine(N, DCI); case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { assert(!N->getValueType(0).isVector() && @@ -2905,6 +3349,18 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performLoadCombine(N, DCI); case ISD::STORE: return performStoreCombine(N, DCI); + case AMDGPUISD::CLAMP: + return performClampCombine(N, DCI); + case AMDGPUISD::RCP: { + if (const auto *CFP = dyn_cast(N->getOperand(0))) { + // XXX - Should this flush denormals? + const APFloat &Val = CFP->getValueAPF(); + APFloat One(Val.getSemantics(), "1.0"); + return DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0)); + } + + break; + } } return SDValue(); } @@ -2978,6 +3434,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(DIV_SCALE) NODE_NAME_CASE(DIV_FMAS) NODE_NAME_CASE(DIV_FIXUP) + NODE_NAME_CASE(FMAD_FTZ) NODE_NAME_CASE(TRIG_PREOP) NODE_NAME_CASE(RCP) NODE_NAME_CASE(RSQ) @@ -3020,12 +3477,15 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CVT_F32_UBYTE1) NODE_NAME_CASE(CVT_F32_UBYTE2) NODE_NAME_CASE(CVT_F32_UBYTE3) + NODE_NAME_CASE(CVT_PKRTZ_F16_F32) NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) NODE_NAME_CASE(KILL) + NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) + NODE_NAME_CASE(SENDMSGHALT) NODE_NAME_CASE(INTERP_MOV) NODE_NAME_CASE(INTERP_P1) NODE_NAME_CASE(INTERP_P2) @@ -3035,6 +3495,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(ATOMIC_CMP_SWAP) NODE_NAME_CASE(ATOMIC_INC) NODE_NAME_CASE(ATOMIC_DEC) + NODE_NAME_CASE(BUFFER_LOAD) + NODE_NAME_CASE(BUFFER_LOAD_FORMAT) case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; } return nullptr; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 8fdf814..12e1ff8 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -102,6 +102,18 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::i32, MVT::i8, Custom); setTruncStoreAction(MVT::i32, MVT::i16, Custom); + // We need to include these since trunc STORES to PRIVATE need + // special handling to accommodate RMW + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom); + setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom); + setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom); + setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); + setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom); + setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom); // Workaround for LegalizeDAG asserting on expansion of i1 vector stores. setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand); @@ -212,6 +224,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBE, VT, Expand); } + // LLVM will expand these to atomic_cmp_swap(0) + // and atomic_swap, respectively. + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setSchedulingPreference(Sched::Source); setTargetDAGCombine(ISD::FP_ROUND); @@ -257,7 +274,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode()))); for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) { - NewMI.addOperand(MI.getOperand(i)); + NewMI.add(MI.getOperand(i)); } } else { return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); @@ -330,34 +347,34 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case AMDGPU::RAT_WRITE_CACHELESS_64_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) .addImm(isEOP(I)); // Set End of program bit break; case AMDGPU::RAT_STORE_TYPED_eg: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(2)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) .addImm(isEOP(I)); // Set End of program bit break; case AMDGPU::BRANCH: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) - .addOperand(MI.getOperand(0)); + .add(MI.getOperand(0)); break; case AMDGPU::BRANCH_COND_f32: { MachineInstr *NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), AMDGPU::PREDICATE_BIT) - .addOperand(MI.getOperand(1)) + .add(MI.getOperand(1)) .addImm(AMDGPU::PRED_SETNE) .addImm(0); // Flags TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) - .addOperand(MI.getOperand(0)) + .add(MI.getOperand(0)) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); break; } @@ -366,12 +383,12 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineInstr *NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), AMDGPU::PREDICATE_BIT) - .addOperand(MI.getOperand(1)) + .add(MI.getOperand(1)) .addImm(AMDGPU::PRED_SETNE_INT) .addImm(0); // Flags TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) - .addOperand(MI.getOperand(0)) + .add(MI.getOperand(0)) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); break; } @@ -399,13 +416,13 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return BB; unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40; BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(2)) - .addOperand(MI.getOperand(3)) - .addOperand(MI.getOperand(4)) - .addOperand(MI.getOperand(5)) - .addOperand(MI.getOperand(6)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(5)) + .add(MI.getOperand(6)) .addImm(CfInst) .addImm(EOP); break; @@ -902,7 +919,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const if (VT == MVT::f32) { DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); - SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); + SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); if (MinMax) return MinMax; } @@ -1090,79 +1107,136 @@ void R600TargetLowering::getStackAddress(unsigned StackWidth, SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const { SDLoc DL(Store); + //TODO: Who creates the i8 stores? + assert(Store->isTruncatingStore() + || Store->getValue().getValueType() == MVT::i8); + assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); - unsigned Mask = 0; + SDValue Mask; if (Store->getMemoryVT() == MVT::i8) { - Mask = 0xff; + assert(Store->getAlignment() >= 1); + Mask = DAG.getConstant(0xff, DL, MVT::i32); } else if (Store->getMemoryVT() == MVT::i16) { - Mask = 0xffff; + assert(Store->getAlignment() >= 2); + Mask = DAG.getConstant(0xffff, DL, MVT::i32);; + } else { + llvm_unreachable("Unsupported private trunc store"); } - SDValue Chain = Store->getChain(); + SDValue OldChain = Store->getChain(); + bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); + // Skip dummy + SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain; SDValue BasePtr = Store->getBasePtr(); + SDValue Offset = Store->getOffset(); EVT MemVT = Store->getMemoryVT(); - SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr, - DAG.getConstant(2, DL, MVT::i32)); - SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, - Chain, Ptr, - DAG.getTargetConstant(0, DL, MVT::i32)); + SDValue LoadPtr = BasePtr; + if (!Offset.isUndef()) { + LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); + } + + // Get dword location + // TODO: this should be eliminated by the future SHR ptr, 2 + SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, + DAG.getConstant(0xfffffffc, DL, MVT::i32)); - SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr, + // Load dword + // TODO: can we be smarter about machine pointer info? + SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo()); + + Chain = Dst.getValue(1); + + // Get offset in dword + SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); + // Convert byte offset to bit shift SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, DAG.getConstant(3, DL, MVT::i32)); + // TODO: Contrary to the name of the functiom, + // it also handles sub i32 non-truncating stores (like i1) SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Store->getValue()); + // Mask the value to the right type SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT); + // Shift the value in place SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, MaskedValue, ShiftAmt); - SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, - DAG.getConstant(Mask, DL, MVT::i32), - ShiftAmt); - DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask, - DAG.getConstant(0xffffffff, DL, MVT::i32)); + // Shift the mask in place + SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt); + + // Invert the mask. NOTE: if we had native ROL instructions we could + // use inverted mask + DstMask = DAG.getNOT(DL, DstMask, MVT::i32); + + // Cleanup the target bits Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); + // Add the new bits SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); - return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, - Chain, Value, Ptr, - DAG.getTargetConstant(0, DL, MVT::i32)); + + // Store dword + // TODO: Can we be smarter about MachinePointerInfo? + SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo()); + + // If we are part of expanded vector, make our neighbors depend on this store + if (VectorTrunc) { + // Make all other vector elements depend on this store + Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); + DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); + } + return NewStore; } SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *StoreNode = cast(Op); unsigned AS = StoreNode->getAddressSpace(); + + SDValue Chain = StoreNode->getChain(); + SDValue Ptr = StoreNode->getBasePtr(); SDValue Value = StoreNode->getValue(); - EVT ValueVT = Value.getValueType(); + + EVT VT = Value.getValueType(); EVT MemVT = StoreNode->getMemoryVT(); - unsigned Align = StoreNode->getAlignment(); + EVT PtrVT = Ptr.getValueType(); + + SDLoc DL(Op); + // Neither LOCAL nor PRIVATE can do vectors at the moment if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && - ValueVT.isVector()) { - return SplitVectorStore(Op, DAG); + VT.isVector()) { + if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { + // Add an extra level of chain to isolate this vector + SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); + // TODO: can the chain be replaced without creating a new store? + SDValue NewStore = DAG.getTruncStore( + NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), + MemVT, StoreNode->getAlignment(), + StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); + StoreNode = cast(NewStore); + } + + return scalarizeVectorStore(StoreNode, DAG); } - // Private AS needs special fixes - if (Align < MemVT.getStoreSize() && (AS != AMDGPUAS::PRIVATE_ADDRESS) && + unsigned Align = StoreNode->getAlignment(); + if (Align < MemVT.getStoreSize() && !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) { return expandUnalignedStore(StoreNode, DAG); } - SDLoc DL(Op); - SDValue Chain = StoreNode->getChain(); - SDValue Ptr = StoreNode->getBasePtr(); + SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, + DAG.getConstant(2, DL, PtrVT)); if (AS == AMDGPUAS::GLOBAL_ADDRESS) { // It is beneficial to create MSKOR here instead of combiner to avoid // artificial dependencies introduced by RMW if (StoreNode->isTruncatingStore()) { - EVT VT = Value.getValueType(); assert(VT.bitsLE(MVT::i32)); SDValue MaskConstant; if (MemVT == MVT::i8) { @@ -1172,15 +1246,19 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { assert(StoreNode->getAlignment() >= 2); MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32); } - SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr, - DAG.getConstant(2, DL, MVT::i32)); - SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(0x00000003, DL, VT)); + + SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr, + DAG.getConstant(0x00000003, DL, PtrVT)); + SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, + DAG.getConstant(3, DL, VT)); + + // Put the mask in correct place + SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); + + // Put the value bits in correct place SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); - SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, - DAG.getConstant(3, DL, VT)); - SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift); - SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift); + SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); + // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32 // vector instead. SDValue Src[4] = { @@ -1194,12 +1272,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, Op->getVTList(), Args, MemVT, StoreNode->getMemOperand()); - } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && - ValueVT.bitsGE(MVT::i32)) { + } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) { // Convert pointer from byte address to dword address. - Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), - DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), - Ptr, DAG.getConstant(2, DL, MVT::i32))); + Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { llvm_unreachable("Truncated and indexed stores not supported yet"); @@ -1210,49 +1285,22 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } } + // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes if (AS != AMDGPUAS::PRIVATE_ADDRESS) return SDValue(); if (MemVT.bitsLT(MVT::i32)) return lowerPrivateTruncStore(StoreNode, DAG); - // Lowering for indirect addressing - const MachineFunction &MF = DAG.getMachineFunction(); - const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); - unsigned StackWidth = TFL->getStackWidth(MF); - - Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); - - if (ValueVT.isVector()) { - unsigned NumElemVT = ValueVT.getVectorNumElements(); - EVT ElemVT = ValueVT.getVectorElementType(); - SmallVector Stores(NumElemVT); - - assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " - "vector width in load"); - - for (unsigned i = 0; i < NumElemVT; ++i) { - unsigned Channel, PtrIncr; - getStackAddress(StackWidth, i, Channel, PtrIncr); - Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, - DAG.getConstant(PtrIncr, DL, MVT::i32)); - SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, - Value, DAG.getConstant(i, DL, MVT::i32)); - - Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, - Chain, Elem, Ptr, - DAG.getTargetConstant(Channel, DL, MVT::i32)); - } - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); - } else { - if (ValueVT == MVT::i8) { - Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); - } - Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, - DAG.getTargetConstant(0, DL, MVT::i32)); // Channel + // Standard i32+ store, tag it with DWORDADDR to note that the address + // has been shifted + if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { + Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); + return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); } - return Chain; + // Tagged i32+ stores will be matched by patterns + return SDValue(); } // return (512 + (kc_bank << 12) @@ -1302,51 +1350,50 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op, LoadSDNode *Load = cast(Op); ISD::LoadExtType ExtType = Load->getExtensionType(); EVT MemVT = Load->getMemoryVT(); + assert(Load->getAlignment() >= MemVT.getStoreSize()); + + SDValue BasePtr = Load->getBasePtr(); + SDValue Chain = Load->getChain(); + SDValue Offset = Load->getOffset(); + + SDValue LoadPtr = BasePtr; + if (!Offset.isUndef()) { + LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); + } - // getBasePtr(), - DAG.getConstant(2, DL, MVT::i32)); - // Load the Register. - SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), - Load->getChain(), - Ptr, - DAG.getTargetConstant(0, DL, MVT::i32), - Op.getOperand(2)); + // Load dword + // TODO: can we be smarter about machine pointer info? + SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo()); // Get offset within the register. SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, - Load->getBasePtr(), - DAG.getConstant(0x3, DL, MVT::i32)); + LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); // Bit offset of target byte (byteIdx * 8). SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, DAG.getConstant(3, DL, MVT::i32)); // Shift to the right. - Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt); + SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt); // Eliminate the upper bits by setting them to ... EVT MemEltVT = MemVT.getScalarType(); - // ... ones. - if (ExtType == ISD::SEXTLOAD) { + if (ExtType == ISD::SEXTLOAD) { // ... ones. SDValue MemEltVTNode = DAG.getValueType(MemEltVT); - - SDValue Ops[] = { - DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode), - Load->getChain() - }; - - return DAG.getMergeValues(Ops, DL); + Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode); + } else { // ... or zeros. + Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT); } - // ... or zeros. SDValue Ops[] = { - DAG.getZeroExtendInReg(Ret, DL, MemEltVT), - Load->getChain() + Ret, + Read.getValue(1) // This should be our output chain }; return DAG.getMergeValues(Ops, DL); @@ -1368,12 +1415,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LoadNode->getChain(); SDValue Ptr = LoadNode->getBasePtr(); - if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { - SDValue MergedValues[2] = { - scalarizeVectorLoad(LoadNode, DAG), - Chain - }; - return DAG.getMergeValues(MergedValues, DL); + if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && + VT.isVector()) { + return scalarizeVectorLoad(LoadNode, DAG); } int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); @@ -1424,8 +1469,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(MergedValues, DL); } - SDValue LoweredLoad; - // For most operations returning SDValue() will result in the node being // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we // need to manually expand loads that may be legal in some address spaces and @@ -1450,47 +1493,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } - // Lowering for indirect addressing - const MachineFunction &MF = DAG.getMachineFunction(); - const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); - unsigned StackWidth = TFL->getStackWidth(MF); - - Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); - - if (VT.isVector()) { - unsigned NumElemVT = VT.getVectorNumElements(); - EVT ElemVT = VT.getVectorElementType(); - SDValue Loads[4]; - - assert(NumElemVT <= 4); - assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " - "vector width in load"); - - for (unsigned i = 0; i < NumElemVT; ++i) { - unsigned Channel, PtrIncr; - getStackAddress(StackWidth, i, Channel, PtrIncr); - Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, - DAG.getConstant(PtrIncr, DL, MVT::i32)); - Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, - Chain, Ptr, - DAG.getTargetConstant(Channel, DL, MVT::i32), - Op.getOperand(2)); - } - EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT); - LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT)); - } else { - LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, - Chain, Ptr, - DAG.getTargetConstant(0, DL, MVT::i32), // Channel - Op.getOperand(2)); + // DWORDADDR ISD marks already shifted address + if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { + assert(VT == MVT::i32); + Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32)); + Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr); + return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand()); } - - SDValue Ops[2] = { - LoweredLoad, - Chain - }; - - return DAG.getMergeValues(Ops, DL); + return SDValue(); } SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { @@ -1580,7 +1590,7 @@ SDValue R600TargetLowering::LowerFormalArguments( unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); - unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset(); + unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset(); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); SDValue Arg = DAG.getLoad( diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index c744f55..337782c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -15,7 +15,11 @@ #include "ARMRegisterInfo.h" #include "ARMUnwindOpAsm.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" @@ -24,25 +28,33 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCValue.h" +#include "llvm/MC/SectionKind.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/TargetParser.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetParser.h" #include +#include +#include +#include +#include +#include using namespace llvm; @@ -101,16 +113,21 @@ ARMTargetAsmStreamer::ARMTargetAsmStreamer(MCStreamer &S, bool VerboseAsm) : ARMTargetStreamer(S), OS(OS), InstPrinter(InstPrinter), IsVerboseAsm(VerboseAsm) {} + void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; } void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; } void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; } + void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) { OS << "\t.personality " << Personality->getName() << '\n'; } + void ARMTargetAsmStreamer::emitPersonalityIndex(unsigned Index) { OS << "\t.personalityindex " << Index << '\n'; } + void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; } + void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { OS << "\t.setfp\t"; @@ -121,6 +138,7 @@ void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, OS << ", #" << Offset; OS << '\n'; } + void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) { assert((Reg != ARM::SP && Reg != ARM::PC) && "the operand of .movsp cannot be either sp or pc"); @@ -131,9 +149,11 @@ void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) { OS << ", #" << Offset; OS << '\n'; } + void ARMTargetAsmStreamer::emitPad(int64_t Offset) { OS << "\t.pad\t#" << Offset << '\n'; } + void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl &RegList, bool isVector) { assert(RegList.size() && "RegList should not be empty"); @@ -151,8 +171,9 @@ void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl &RegList, OS << "}\n"; } -void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) { -} + +void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {} + void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) { OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value); if (IsVerboseAsm) { @@ -162,6 +183,7 @@ void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) { } OS << "\n"; } + void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute, StringRef String) { switch (Attribute) { @@ -179,6 +201,7 @@ void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute, } OS << "\n"; } + void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) { @@ -194,20 +217,25 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, } OS << "\n"; } + void ARMTargetAsmStreamer::emitArch(unsigned Arch) { OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n"; } + void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) { OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n"; } + void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) { OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n'; } + void ARMTargetAsmStreamer::emitFPU(unsigned FPU) { OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n"; } -void ARMTargetAsmStreamer::finishAttributeSection() { -} + +void ARMTargetAsmStreamer::finishAttributeSection() {} + void ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { OS << "\t.tlsdescseq\t" << S->getSymbol().getName(); @@ -274,12 +302,12 @@ class ARMTargetELFStreamer : public ARMTargetStreamer { }; StringRef CurrentVendor; - unsigned FPU; - unsigned Arch; - unsigned EmittedArch; + unsigned FPU = ARM::FK_INVALID; + unsigned Arch = ARM::AK_INVALID; + unsigned EmittedArch = ARM::AK_INVALID; SmallVector Contents; - MCSection *AttributeSection; + MCSection *AttributeSection = nullptr; AttributeItem *getAttributeItem(unsigned Attribute) { for (size_t i = 0; i < Contents.size(); ++i) @@ -393,9 +421,7 @@ class ARMTargetELFStreamer : public ARMTargetStreamer { public: ARMTargetELFStreamer(MCStreamer &S) - : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID), - Arch(ARM::AK_INVALID), EmittedArch(ARM::AK_INVALID), - AttributeSection(nullptr) {} + : ARMTargetStreamer(S), CurrentVendor("aeabi") {} }; /// Extend the generic ELFStreamer class so that it can emit mapping symbols at @@ -416,12 +442,11 @@ class ARMELFStreamer : public MCELFStreamer { ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool IsThumb) - : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb), - MappingSymbolCounter(0), LastEMS(EMS_None) { + : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb) { EHReset(); } - ~ARMELFStreamer() {} + ~ARMELFStreamer() override = default; void FinishImpl() override; @@ -601,10 +626,10 @@ class ARMELFStreamer : public MCELFStreamer { void EmitFixup(const MCExpr *Expr, MCFixupKind Kind); bool IsThumb; - int64_t MappingSymbolCounter; + int64_t MappingSymbolCounter = 0; DenseMap LastMappingSymbols; - ElfMappingSymbol LastEMS; + ElfMappingSymbol LastEMS = EMS_None; // ARM Exception Handling Frame Information MCSymbol *ExTab; @@ -620,6 +645,7 @@ class ARMELFStreamer : public MCELFStreamer { SmallVector Opcodes; UnwindOpcodeAssembler UnwindOpAsm; }; + } // end anonymous namespace ARMELFStreamer &ARMTargetELFStreamer::getStreamer() { @@ -629,33 +655,42 @@ ARMELFStreamer &ARMTargetELFStreamer::getStreamer() { void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); } void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); } void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); } + void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) { getStreamer().emitPersonality(Personality); } + void ARMTargetELFStreamer::emitPersonalityIndex(unsigned Index) { getStreamer().emitPersonalityIndex(Index); } + void ARMTargetELFStreamer::emitHandlerData() { getStreamer().emitHandlerData(); } + void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { getStreamer().emitSetFP(FpReg, SpReg, Offset); } + void ARMTargetELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) { getStreamer().emitMovSP(Reg, Offset); } + void ARMTargetELFStreamer::emitPad(int64_t Offset) { getStreamer().emitPad(Offset); } + void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl &RegList, bool isVector) { getStreamer().emitRegSave(RegList, isVector); } + void ARMTargetELFStreamer::emitUnwindRaw(int64_t Offset, const SmallVectorImpl &Opcodes) { getStreamer().emitUnwindRaw(Offset, Opcodes); } + void ARMTargetELFStreamer::switchVendor(StringRef Vendor) { assert(!Vendor.empty() && "Vendor cannot be empty."); @@ -670,25 +705,31 @@ void ARMTargetELFStreamer::switchVendor(StringRef Vendor) { CurrentVendor = Vendor; } + void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) { setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); } + void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute, StringRef Value) { setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); } + void ARMTargetELFStreamer::emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) { setAttributeItems(Attribute, IntValue, StringValue, /* OverwriteExisting= */ true); } + void ARMTargetELFStreamer::emitArch(unsigned Value) { Arch = Value; } + void ARMTargetELFStreamer::emitObjectArch(unsigned Value) { EmittedArch = Value; } + void ARMTargetELFStreamer::emitArchDefaultAttributes() { using namespace ARMBuildAttrs; @@ -788,9 +829,11 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { break; } } + void ARMTargetELFStreamer::emitFPU(unsigned Value) { FPU = Value; } + void ARMTargetELFStreamer::emitFPUDefaultAttributes() { switch (FPU) { case ARM::FK_VFP: @@ -922,6 +965,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { break; } } + size_t ARMTargetELFStreamer::calculateContentSize() const { size_t Result = 0; for (size_t i = 0; i < Contents.size(); ++i) { @@ -946,6 +990,7 @@ size_t ARMTargetELFStreamer::calculateContentSize() const { } return Result; } + void ARMTargetELFStreamer::finishAttributeSection() { // // [ "vendor-name" @@ -1095,9 +1140,8 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix, const MCSymbolELF *Group = FnSection.getGroup(); if (Group) Flags |= ELF::SHF_GROUP; - MCSectionELF *EHSection = - getContext().getELFSection(EHSecName, Type, Flags, 0, Group, - FnSection.getUniqueID(), nullptr, &FnSection); + MCSectionELF *EHSection = getContext().getELFSection( + EHSecName, Type, Flags, 0, Group, FnSection.getUniqueID(), &FnSection); assert(EHSection && "Failed to get the required EH section"); @@ -1116,6 +1160,7 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER, SectionKind::getData(), FnStart); } + void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) { MCDataFragment *Frag = getOrCreateDataFragment(); Frag->getFixups().push_back(MCFixup::create(Frag->getContents().size(), Expr, @@ -1398,8 +1443,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; - } - } - +} // end namespace llvm diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 872b49a..7a8e01b 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2423,12 +2423,22 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, if (OpReg == 0) return false; + unsigned ImplicitDefReg; + if (Subtarget->hasAVX()) { + ImplicitDefReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + + } + unsigned ResultReg = createResultReg(RC); MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc), ResultReg); + if (Subtarget->hasAVX()) - MIB.addReg(OpReg); + MIB.addReg(ImplicitDefReg); + MIB.addReg(OpReg); updateValueMap(I, ResultReg); return true; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 45194a9..d394839 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17,6 +17,7 @@ #include "X86CallingConv.h" #include "X86FrameLowering.h" #include "X86InstrBuilder.h" +#include "X86IntrinsicsInfo.h" #include "X86MachineFunctionInfo.h" #include "X86ShuffleDecodeConstantPool.h" #include "X86TargetMachine.h" @@ -53,10 +54,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" -#include "X86IntrinsicsInfo.h" +#include #include -#include #include +#include using namespace llvm; #define DEBUG_TYPE "x86-isel" @@ -69,6 +70,13 @@ static cl::opt ExperimentalVectorWideningLegalization( "rather than promotion."), cl::Hidden); +static cl::opt ExperimentalPrefLoopAlignment( + "x86-experimental-pref-loop-alignment", cl::init(4), + cl::desc("Sets the preferable loop alignment for experiments " + "(the last x86-experimental-pref-loop-alignment bits" + " of the loop header PC will be 0)."), + cl::Hidden); + X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -96,12 +104,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); - // Bypass expensive divides on Atom when compiling with O2. + // Bypass expensive divides and use cheaper ones. if (TM.getOptLevel() >= CodeGenOpt::Default) { if (Subtarget.hasSlowDivide32()) addBypassSlowDiv(32, 8); if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) - addBypassSlowDiv(64, 16); + addBypassSlowDiv(64, 32); } if (Subtarget.isTargetKnownWindowsMSVC() || @@ -781,6 +789,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); @@ -921,6 +930,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // SSE41 brings specific instructions for doing vector sign extend even in // cases where we don't have SRA. + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Legal); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Legal); + + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v2i64, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v4i32, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v8i16, Legal); + for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom); @@ -1125,7 +1142,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } @@ -1279,6 +1296,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); // FIXME. This commands are available on SSE/AVX2, add relevant patterns. setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal); @@ -1305,10 +1324,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); - if (Subtarget.hasDQI()) { - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); - } + for (auto VT : { MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FCEIL, VT, Legal); @@ -1357,12 +1373,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, MVT::v16i32, Legal); setOperationAction(ISD::UMIN, MVT::v8i64, Legal); - setOperationAction(ISD::ADD, MVT::v8i1, Expand); - setOperationAction(ISD::ADD, MVT::v16i1, Expand); - setOperationAction(ISD::SUB, MVT::v8i1, Expand); - setOperationAction(ISD::SUB, MVT::v16i1, Expand); - setOperationAction(ISD::MUL, MVT::v8i1, Expand); - setOperationAction(ISD::MUL, MVT::v16i1, Expand); + setOperationAction(ISD::ADD, MVT::v8i1, Custom); + setOperationAction(ISD::ADD, MVT::v16i1, Custom); + setOperationAction(ISD::SUB, MVT::v8i1, Custom); + setOperationAction(ISD::SUB, MVT::v16i1, Custom); + setOperationAction(ISD::MUL, MVT::v8i1, Custom); + setOperationAction(ISD::MUL, MVT::v16i1, Custom); setOperationAction(ISD::MUL, MVT::v16i32, Legal); @@ -1441,7 +1457,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VSELECT, VT, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); setOperationAction(ISD::MLOAD, VT, Legal); setOperationAction(ISD::MSTORE, VT, Legal); setOperationAction(ISD::MGATHER, VT, Legal); @@ -1460,12 +1476,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::v32i1, &X86::VK32RegClass); addRegisterClass(MVT::v64i1, &X86::VK64RegClass); - setOperationAction(ISD::ADD, MVT::v32i1, Expand); - setOperationAction(ISD::ADD, MVT::v64i1, Expand); - setOperationAction(ISD::SUB, MVT::v32i1, Expand); - setOperationAction(ISD::SUB, MVT::v64i1, Expand); - setOperationAction(ISD::MUL, MVT::v32i1, Expand); - setOperationAction(ISD::MUL, MVT::v64i1, Expand); + setOperationAction(ISD::ADD, MVT::v32i1, Custom); + setOperationAction(ISD::ADD, MVT::v64i1, Custom); + setOperationAction(ISD::SUB, MVT::v32i1, Custom); + setOperationAction(ISD::SUB, MVT::v64i1, Custom); + setOperationAction(ISD::MUL, MVT::v32i1, Custom); + setOperationAction(ISD::MUL, MVT::v64i1, Custom); setOperationAction(ISD::SETCC, MVT::v32i1, Custom); setOperationAction(ISD::SETCC, MVT::v64i1, Custom); @@ -1479,8 +1495,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom); @@ -1574,9 +1590,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::v2i1, &X86::VK2RegClass); for (auto VT : { MVT::v2i1, MVT::v4i1 }) { - setOperationAction(ISD::ADD, VT, Expand); - setOperationAction(ISD::SUB, VT, Expand); - setOperationAction(ISD::MUL, VT, Expand); + setOperationAction(ISD::ADD, VT, Custom); + setOperationAction(ISD::SUB, VT, Custom); + setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::TRUNCATE, VT, Custom); @@ -1671,6 +1687,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::INSERT_SUBVECTOR); setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); @@ -1696,6 +1713,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); + setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG); + setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::SETCC); @@ -1712,7 +1731,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = 4; - setPrefLoopAlignment(4); // 2^4 bytes. + // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). + setPrefLoopAlignment(ExperimentalPrefLoopAlignment); // An out-of-order CPU can speculatively execute past a predictable branch, // but a conditional move could be stalled by an expensive earlier operation. @@ -2001,21 +2021,37 @@ unsigned X86TargetLowering::getAddressSpace() const { return 256; } -Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const { - // glibc has a special slot for the stack guard in tcbhead_t, use it instead - // of the usual global variable (see sysdeps/{i386,x86_64}/nptl/tls.h) - if (!Subtarget.isTargetGlibc()) - return TargetLowering::getIRStackGuard(IRB); - - // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs: - // %gs:0x14 on i386 - unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; - unsigned AddressSpace = getAddressSpace(); +static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { + return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || + (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); +} + +static Constant* SegmentOffset(IRBuilder<> &IRB, + unsigned Offset, unsigned AddressSpace) { return ConstantExpr::getIntToPtr( ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); } +Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const { + // glibc, bionic, and Fuchsia have a special slot for the stack guard in + // tcbhead_t; use it instead of the usual global variable (see + // sysdeps/{i386,x86_64}/nptl/tls.h) + if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { + if (Subtarget.isTargetFuchsia()) { + // defines MX_TLS_STACK_GUARD_OFFSET with this value. + return SegmentOffset(IRB, 0x10, 257); + } else { + // %fs:0x28, unless we're using a Kernel code model, in which case + // it's %gs:0x28. gs:0x14 on i386. + unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; + return SegmentOffset(IRB, Offset, getAddressSpace()); + } + } + + return TargetLowering::getIRStackGuard(IRB); +} + void X86TargetLowering::insertSSPDeclarations(Module &M) const { // MSVC CRT provides functionalities for stack protection. if (Subtarget.getTargetTriple().isOSMSVCRT()) { @@ -2032,8 +2068,8 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const { SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg); return; } - // glibc has a special slot for the stack guard. - if (Subtarget.isTargetGlibc()) + // glibc, bionic, and Fuchsia have a special slot for the stack guard. + if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) return; TargetLowering::insertSSPDeclarations(M); } @@ -2056,21 +2092,23 @@ Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { if (Subtarget.getTargetTriple().isOSContiki()) return getDefaultSafeStackPointerLocation(IRB, false); - if (!Subtarget.isTargetAndroid()) - return TargetLowering::getSafeStackPointerLocation(IRB); - // Android provides a fixed TLS slot for the SafeStack pointer. See the // definition of TLS_SLOT_SAFESTACK in // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h - unsigned AddressSpace, Offset; + if (Subtarget.isTargetAndroid()) { + // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: + // %gs:0x24 on i386 + unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; + return SegmentOffset(IRB, Offset, getAddressSpace()); + } - // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: - // %gs:0x24 on i386 - Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; - AddressSpace = getAddressSpace(); - return ConstantExpr::getIntToPtr( - ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), - Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); + // Fuchsia is similar. + if (Subtarget.isTargetFuchsia()) { + // defines MX_TLS_UNSAFE_SP_OFFSET with this value. + return SegmentOffset(IRB, 0x18, 257); + } + + return TargetLowering::getSafeStackPointerLocation(IRB); } bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, @@ -2669,6 +2707,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; + MVT PtrVT = getPointerTy(DAG.getDataLayout()); // If value is passed by pointer we have address passed instead of the value // itself. No need to extend if the mask value and location share the same @@ -2707,30 +2746,71 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, if (CallConv == CallingConv::X86_INTR) { MFI.setObjectOffset(FI, Offset); } - return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - } else { - int FI = MFI.CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable); - - // Set SExt or ZExt flag. - if (VA.getLocInfo() == CCValAssign::ZExt) { - MFI.setObjectZExt(FI, true); - } else if (VA.getLocInfo() == CCValAssign::SExt) { - MFI.setObjectSExt(FI, true); + return DAG.getFrameIndex(FI, PtrVT); + } + + // This is an argument in memory. We might be able to perform copy elision. + if (Flags.isCopyElisionCandidate()) { + EVT ArgVT = Ins[i].ArgVT; + SDValue PartAddr; + if (Ins[i].PartOffset == 0) { + // If this is a one-part value or the first part of a multi-part value, + // create a stack object for the entire argument value type and return a + // load from our portion of it. This assumes that if the first part of an + // argument is in memory, the rest will also be in memory. + int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), + /*Immutable=*/false); + PartAddr = DAG.getFrameIndex(FI, PtrVT); + return DAG.getLoad( + ValVT, dl, Chain, PartAddr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + } else { + // This is not the first piece of an argument in memory. See if there is + // already a fixed stack object including this offset. If so, assume it + // was created by the PartOffset == 0 branch above and create a load from + // the appropriate offset into it. + int64_t PartBegin = VA.getLocMemOffset(); + int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; + int FI = MFI.getObjectIndexBegin(); + for (; MFI.isFixedObjectIndex(FI); ++FI) { + int64_t ObjBegin = MFI.getObjectOffset(FI); + int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); + if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) + break; + } + if (MFI.isFixedObjectIndex(FI)) { + SDValue Addr = + DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), + DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); + return DAG.getLoad( + ValVT, dl, Chain, Addr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI, + Ins[i].PartOffset)); + } } + } - // Adjust SP offset of interrupt parameter. - if (CallConv == CallingConv::X86_INTR) { - MFI.setObjectOffset(FI, Offset); - } + int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, + VA.getLocMemOffset(), isImmutable); + + // Set SExt or ZExt flag. + if (VA.getLocInfo() == CCValAssign::ZExt) { + MFI.setObjectZExt(FI, true); + } else if (VA.getLocInfo() == CCValAssign::SExt) { + MFI.setObjectSExt(FI, true); + } - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue Val = DAG.getLoad( - ValVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); - return ExtendedInMem ? - DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val; + // Adjust SP offset of interrupt parameter. + if (CallConv == CallingConv::X86_INTR) { + MFI.setObjectOffset(FI, Offset); } + + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + SDValue Val = DAG.getLoad( + ValVT, dl, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) + : Val; } // FIXME: Get this from tablegen. @@ -2781,6 +2861,15 @@ static ArrayRef get64BitArgumentXMMs(MachineFunction &MF, return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); } +#ifndef NDEBUG +static bool isSortedByValueNo(const SmallVectorImpl &ArgLocs) { + return std::is_sorted(ArgLocs.begin(), ArgLocs.end(), + [](const CCValAssign &A, const CCValAssign &B) -> bool { + return A.getValNo() < B.getValNo(); + }); +} +#endif + SDValue X86TargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, @@ -2815,11 +2904,22 @@ SDValue X86TargetLowering::LowerFormalArguments( SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); - // Allocate shadow area for Win64 + // Allocate shadow area for Win64. if (IsWin64) CCInfo.AllocateStack(32, 8); - CCInfo.AnalyzeFormalArguments(Ins, CC_X86); + CCInfo.AnalyzeArguments(Ins, CC_X86); + + // In vectorcall calling convention a second pass is required for the HVA + // types. + if (CallingConv::X86_VectorCall == CallConv) { + CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86); + } + + // The next loop assumes that the locations are in the same order of the + // input arguments. + assert(isSortedByValueNo(ArgLocs) && + "Argument Location list must be sorted before lowering"); SDValue ArgValue; for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; @@ -3263,11 +3363,17 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); - // Allocate shadow area for Win64 + // Allocate shadow area for Win64. if (IsWin64) CCInfo.AllocateStack(32, 8); - CCInfo.AnalyzeCallOperands(Outs, CC_X86); + CCInfo.AnalyzeArguments(Outs, CC_X86); + + // In vectorcall calling convention a second pass is required for the HVA + // types. + if (CallingConv::X86_VectorCall == CallConv) { + CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); + } // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); @@ -3322,6 +3428,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector MemOpChains; SDValue StackPtr; + // The next loop assumes that the locations are in the same order of the + // input arguments. + assert(isSortedByValueNo(ArgLocs) && + "Argument Location list must be sorted before lowering"); + // Walk the register/memloc assignments, inserting copies/loads. In the case // of tail call optimization arguments are handle later. const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); @@ -4103,6 +4214,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) { return true; // 'Faux' Target Shuffles. case ISD::AND: + case X86ISD::ANDNP: return true; } } @@ -4419,6 +4531,11 @@ bool X86TargetLowering::isCtlzFast() const { return Subtarget.hasFastLZCNT(); } +bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + return true; +} + bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { if (!Subtarget.hasBMI()) return false; @@ -4741,9 +4858,10 @@ static SDValue getConstVector(ArrayRef Values, MVT VT, SelectionDAG &DAG, return ConstsNode; } -static SDValue getConstVector(ArrayRef Bits, SmallBitVector &Undefs, +static SDValue getConstVector(ArrayRef Bits, APInt &Undefs, MVT VT, SelectionDAG &DAG, const SDLoc &dl) { - assert(Bits.size() == Undefs.size() && "Unequal constant and undef arrays"); + assert(Bits.size() == Undefs.getBitWidth() && + "Unequal constant and undef arrays"); SmallVector Ops; bool Split = false; @@ -4815,10 +4933,6 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, VT.getVectorNumElements()/Factor); - // Extract from UNDEF is UNDEF. - if (Vec.isUndef()) - return DAG.getUNDEF(ResultVT); - // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); @@ -4889,50 +5003,6 @@ static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl) { assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!"); - - // For insertion into the zero index (low half) of a 256-bit vector, it is - // more efficient to generate a blend with immediate instead of an insert*128. - // We are still creating an INSERT_SUBVECTOR below with an undef node to - // extend the subvector to the size of the result vector. Make sure that - // we are not recursing on that node by checking for undef here. - if (IdxVal == 0 && Result.getValueType().is256BitVector() && - !Result.isUndef()) { - EVT ResultVT = Result.getValueType(); - SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl); - SDValue Undef = DAG.getUNDEF(ResultVT); - SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef, - Vec, ZeroIndex); - - // The blend instruction, and therefore its mask, depend on the data type. - MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT(); - if (ScalarType.isFloatingPoint()) { - // Choose either vblendps (float) or vblendpd (double). - unsigned ScalarSize = ScalarType.getSizeInBits(); - assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type"); - unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f; - SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8); - return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask); - } - - const X86Subtarget &Subtarget = - static_cast(DAG.getSubtarget()); - - // AVX2 is needed for 256-bit integer blend support. - // Integers must be cast to 32-bit because there is only vpblendd; - // vpblendw can't be used for this because it has a handicapped mask. - - // If we don't have AVX2, then cast to float. Using a wrong domain blend - // is still more efficient than using the wrong domain vinsertf128 that - // will be created by InsertSubVector(). - MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32; - - SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8); - Result = DAG.getBitcast(CastVT, Result); - Vec256 = DAG.getBitcast(CastVT, Vec256); - Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask); - return DAG.getBitcast(ResultVT, Vec256); - } - return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128); } @@ -4994,7 +5064,8 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (Vec.isUndef()) { if (IdxVal != 0) { SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8); - WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec, ShiftBits); + WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec, + ShiftBits); } return ExtractSubVec(WideSubVec); } @@ -5003,9 +5074,9 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, NumElems = WideOpVT.getVectorNumElements(); unsigned ShiftLeft = NumElems - SubVecNumElems; unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; - Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec, - DAG.getConstant(ShiftLeft, dl, MVT::i8)); - Vec = ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec, + Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec, + DAG.getConstant(ShiftLeft, dl, MVT::i8)); + Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec; return ExtractSubVec(Vec); } @@ -5014,8 +5085,8 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, // Zero lower bits of the Vec SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); - Vec = DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec, ShiftBits); - Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); // Merge them together, SubVec should be zero extended. WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, getZeroVector(WideOpVT, Subtarget, DAG, dl), @@ -5027,12 +5098,12 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, // Simple case when we put subvector in the upper part if (IdxVal + SubVecNumElems == NumElems) { // Zero upper bits of the Vec - WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec, + WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec, DAG.getConstant(IdxVal, dl, MVT::i8)); SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); - Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, Vec, ShiftBits); - Vec = DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec); return ExtractSubVec(Vec); } @@ -5065,8 +5136,7 @@ static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT, } /// Returns a vector of specified type with all bits set. -/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with -/// no AVX2 support, use two <4 x i32> inserted in a <8 x i32> appropriately. +/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>. /// Then bitcast to their original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl) { @@ -5075,16 +5145,30 @@ static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget, APInt Ones = APInt::getAllOnesValue(32); unsigned NumElts = VT.getSizeInBits() / 32; - SDValue Vec; - if (!Subtarget.hasInt256() && NumElts == 8) { - Vec = DAG.getConstant(Ones, dl, MVT::v4i32); - Vec = concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); - } else { - Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts)); - } + SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts)); return DAG.getBitcast(VT, Vec); } +static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In, + SelectionDAG &DAG) { + EVT InVT = In.getValueType(); + assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode"); + + if (VT.is128BitVector() && InVT.is128BitVector()) + return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT) + : DAG.getZeroExtendVectorInReg(In, DL, VT); + + // For 256-bit vectors, we only need the lower (128-bit) input half. + // For 512-bit vectors, we only need the lower input half or quarter. + if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) { + int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits(); + In = extractSubVector(In, 0, DAG, DL, + std::max(128, (int)VT.getSizeInBits() / Scale)); + } + + return DAG.getNode(Opc, DL, VT, In); +} + /// Generate unpacklo/unpackhi shuffle mask. static void createUnpackShuffleMask(MVT VT, SmallVectorImpl &Mask, bool Lo, bool Unary) { @@ -5170,9 +5254,10 @@ static const Constant *getTargetConstantFromNode(SDValue Op) { // Extract raw constant bits from constant pools. static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, - SmallBitVector &UndefElts, - SmallVectorImpl &EltBits) { - assert(UndefElts.empty() && "Expected an empty UndefElts vector"); + APInt &UndefElts, + SmallVectorImpl &EltBits, + bool AllowWholeUndefs = true, + bool AllowPartialUndefs = true) { assert(EltBits.empty() && "Expected an empty EltBits vector"); Op = peekThroughBitcasts(Op); @@ -5182,56 +5267,84 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); unsigned NumElts = SizeInBits / EltSizeInBits; + unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + // Extract all the undef/constant element data and pack into single bitsets. APInt UndefBits(SizeInBits, 0); APInt MaskBits(SizeInBits, 0); // Split the undef/constant single bitset data into the target elements. auto SplitBitData = [&]() { - UndefElts = SmallBitVector(NumElts, false); + // Don't split if we don't allow undef bits. + bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; + if (UndefBits.getBoolValue() && !AllowUndefs) + return false; + + UndefElts = APInt(NumElts, 0); EltBits.resize(NumElts, APInt(EltSizeInBits, 0)); for (unsigned i = 0; i != NumElts; ++i) { - APInt UndefEltBits = UndefBits.lshr(i * EltSizeInBits); - UndefEltBits = UndefEltBits.zextOrTrunc(EltSizeInBits); + unsigned BitOffset = i * EltSizeInBits; + APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset); - // Only treat an element as UNDEF if all bits are UNDEF, otherwise - // treat it as zero. + // Only treat an element as UNDEF if all bits are UNDEF. if (UndefEltBits.isAllOnesValue()) { - UndefElts[i] = true; + if (!AllowWholeUndefs) + return false; + UndefElts.setBit(i); continue; } - APInt Bits = MaskBits.lshr(i * EltSizeInBits); - Bits = Bits.zextOrTrunc(EltSizeInBits); + // If only some bits are UNDEF then treat them as zero (or bail if not + // supported). + if (UndefEltBits.getBoolValue() && !AllowPartialUndefs) + return false; + + APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset); EltBits[i] = Bits.getZExtValue(); } return true; }; - auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask, - APInt &Undefs) { + // Collect constant bits and insert into mask/undef bit masks. + auto CollectConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask, + APInt &Undefs, unsigned BitOffset) { if (!Cst) return false; unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits(); if (isa(Cst)) { - Mask = APInt::getNullValue(SizeInBits); - Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits); + Undefs.setBits(BitOffset, BitOffset + CstSizeInBits); return true; } if (auto *CInt = dyn_cast(Cst)) { - Mask = CInt->getValue().zextOrTrunc(SizeInBits); - Undefs = APInt::getNullValue(SizeInBits); + Mask |= CInt->getValue().zextOrTrunc(SizeInBits).shl(BitOffset); return true; } if (auto *CFP = dyn_cast(Cst)) { - Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); - Undefs = APInt::getNullValue(SizeInBits); + APInt CstBits = CFP->getValueAPF().bitcastToAPInt(); + Mask |= CstBits.zextOrTrunc(SizeInBits).shl(BitOffset); return true; } return false; }; + // Extract constant bits from build vector. + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + const SDValue &Src = Op.getOperand(i); + unsigned BitOffset = i * SrcEltSizeInBits; + if (Src.isUndef()) { + UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); + continue; + } + auto *Cst = cast(Src); + APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); + MaskBits |= Bits.zext(SizeInBits).shl(BitOffset); + } + return SplitBitData(); + } + // Extract constant bits from constant pool vector. if (auto *Cst = getTargetConstantFromNode(Op)) { Type *CstTy = Cst->getType(); @@ -5239,117 +5352,59 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, return false; unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); - for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) { - APInt Bits, Undefs; - if (!ExtractConstantBits(Cst->getAggregateElement(i), Bits, Undefs)) + for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) + if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits, + i * CstEltSizeInBits)) return false; - MaskBits |= Bits.shl(i * CstEltSizeInBits); - UndefBits |= Undefs.shl(i * CstEltSizeInBits); - } return SplitBitData(); } // Extract constant bits from a broadcasted constant pool scalar. if (Op.getOpcode() == X86ISD::VBROADCAST && - EltSizeInBits <= Op.getScalarValueSizeInBits()) { + EltSizeInBits <= SrcEltSizeInBits) { if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) { - APInt Bits, Undefs; - if (ExtractConstantBits(Broadcast, Bits, Undefs)) { - unsigned NumBroadcastBits = Op.getScalarValueSizeInBits(); - unsigned NumBroadcastElts = SizeInBits / NumBroadcastBits; - for (unsigned i = 0; i != NumBroadcastElts; ++i) { - MaskBits |= Bits.shl(i * NumBroadcastBits); - UndefBits |= Undefs.shl(i * NumBroadcastBits); + APInt Bits(SizeInBits, 0); + APInt Undefs(SizeInBits, 0); + if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) { + for (unsigned i = 0; i != NumSrcElts; ++i) { + MaskBits |= Bits.shl(i * SrcEltSizeInBits); + UndefBits |= Undefs.shl(i * SrcEltSizeInBits); } return SplitBitData(); } } } + // Extract a rematerialized scalar constant insertion. + if (Op.getOpcode() == X86ISD::VZEXT_MOVL && + Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && + isa(Op.getOperand(0).getOperand(0))) { + auto *CN = cast(Op.getOperand(0).getOperand(0)); + MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); + MaskBits = MaskBits.zext(SizeInBits); + return SplitBitData(); + } + return false; } -// TODO: Merge more of this with getTargetConstantBitsFromNode. static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, SmallVectorImpl &RawMask) { - MaskNode = peekThroughBitcasts(MaskNode); - - MVT VT = MaskNode.getSimpleValueType(); - assert(VT.isVector() && "Can't produce a non-vector with a build_vector!"); - unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits; - - // Split an APInt element into MaskEltSizeInBits sized pieces and - // insert into the shuffle mask. - auto SplitElementToMask = [&](APInt Element) { - // Note that this is x86 and so always little endian: the low byte is - // the first byte of the mask. - int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits; - for (int i = 0; i < Split; ++i) { - APInt RawElt = Element.getLoBits(MaskEltSizeInBits); - Element = Element.lshr(MaskEltSizeInBits); - RawMask.push_back(RawElt.getZExtValue()); - } - }; - - if (MaskNode.getOpcode() == X86ISD::VBROADCAST) { - // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0 - // TODO: Handle (VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0 - if (VT.getScalarSizeInBits() != MaskEltSizeInBits) - return false; - if (auto *CN = dyn_cast(MaskNode.getOperand(0))) { - const APInt &MaskElement = CN->getAPIntValue(); - for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { - APInt RawElt = MaskElement.getLoBits(MaskEltSizeInBits); - RawMask.push_back(RawElt.getZExtValue()); - } - } - return false; - } - - if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL && - MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) { - SDValue MaskOp = MaskNode.getOperand(0).getOperand(0); - if (auto *CN = dyn_cast(MaskOp)) { - if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) { - RawMask.push_back(CN->getZExtValue()); - RawMask.append(NumMaskElts - 1, 0); - return true; - } - - if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) { - unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits; - SplitElementToMask(CN->getAPIntValue()); - RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0); - return true; - } - } - return false; - } - - if (MaskNode.getOpcode() != ISD::BUILD_VECTOR) - return false; - - // We can always decode if the buildvector is all zero constants, - // but can't use isBuildVectorAllZeros as it might contain UNDEFs. - if (all_of(MaskNode->ops(), X86::isZeroNode)) { - RawMask.append(NumMaskElts, 0); - return true; - } - - // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0 - if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0) + APInt UndefElts; + SmallVector EltBits; + + // Extract the raw target constant bits. + // FIXME: We currently don't support UNDEF bits or mask entries. + if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts, + EltBits, /* AllowWholeUndefs */ false, + /* AllowPartialUndefs */ false)) return false; - for (SDValue Op : MaskNode->ops()) { - if (auto *CN = dyn_cast(Op.getNode())) - SplitElementToMask(CN->getAPIntValue()); - else if (auto *CFN = dyn_cast(Op.getNode())) - SplitElementToMask(CFN->getValueAPF().bitcastToAPInt()); - else - return false; - } + // Insert the extracted elements into the mask. + for (APInt Elt : EltBits) + RawMask.push_back(Elt.getZExtValue()); return true; } @@ -5376,6 +5431,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, case X86ISD::BLENDI: ImmN = N->getOperand(N->getNumOperands()-1); DecodeBLENDMask(VT, cast(ImmN)->getZExtValue(), Mask); + IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::SHUFP: ImmN = N->getOperand(N->getNumOperands()-1); @@ -5444,8 +5500,18 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, IsUnary = true; break; case X86ISD::VBROADCAST: { - // We only decode broadcasts of same-sized vectors at the moment. - if (N->getOperand(0).getValueType() == VT) { + SDValue N0 = N->getOperand(0); + // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so, + // add the pre-extracted value to the Ops vector. + if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N0.getOperand(0).getValueType() == VT && + N0.getConstantOperandVal(1) == 0) + Ops.push_back(N0.getOperand(0)); + + // We only decode broadcasts of same-sized vectors, unless the broadcast + // came from an extract from the original width. If we found one, we + // pushed it the Ops vector above. + if (N0.getValueType() == VT || !Ops.empty()) { DecodeVectorBroadcast(VT, Mask); IsUnary = true; break; @@ -5640,6 +5706,19 @@ static bool setTargetShuffleZeroElements(SDValue N, V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); + assert((VT.getSizeInBits() % Mask.size()) == 0 && + "Illegal split of shuffle value type"); + unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size(); + + // Extract known constant input data. + APInt UndefSrcElts[2]; + SmallVector SrcEltBits[2]; + bool IsSrcConstant[2] = { + getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0], + SrcEltBits[0], true, false), + getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1], + SrcEltBits[1], true, false)}; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { int M = Mask[i]; @@ -5648,6 +5727,7 @@ static bool setTargetShuffleZeroElements(SDValue N, continue; // Determine shuffle input and normalize the mask. + unsigned SrcIdx = M / Size; SDValue V = M < Size ? V1 : V2; M %= Size; @@ -5657,39 +5737,27 @@ static bool setTargetShuffleZeroElements(SDValue N, continue; } - // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements. - if (V.getOpcode() != ISD::BUILD_VECTOR) - continue; - - // If the BUILD_VECTOR has fewer elements then the (larger) source - // element must be UNDEF/ZERO. - // TODO: Is it worth testing the individual bits of a constant? - if ((Size % V.getNumOperands()) == 0) { - int Scale = Size / V->getNumOperands(); - SDValue Op = V.getOperand(M / Scale); - if (Op.isUndef()) + // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF. + // TODO: We currently only set UNDEF for integer types - floats use the same + // registers as vectors and many of the scalar folded loads rely on the + // SCALAR_TO_VECTOR pattern. + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR && + (Size % V.getValueType().getVectorNumElements()) == 0) { + int Scale = Size / V.getValueType().getVectorNumElements(); + int Idx = M / Scale; + if (Idx != 0 && !VT.isFloatingPoint()) Mask[i] = SM_SentinelUndef; - else if (X86::isZeroNode(Op)) + else if (Idx == 0 && X86::isZeroNode(V.getOperand(0))) Mask[i] = SM_SentinelZero; continue; } - // If the BUILD_VECTOR has more elements then all the (smaller) source - // elements must be all UNDEF or all ZERO. - if ((V.getNumOperands() % Size) == 0) { - int Scale = V->getNumOperands() / Size; - bool AllUndef = true; - bool AllZero = true; - for (int j = 0; j < Scale; ++j) { - SDValue Op = V.getOperand((M * Scale) + j); - AllUndef &= Op.isUndef(); - AllZero &= X86::isZeroNode(Op); - } - if (AllUndef) + // Attempt to extract from the source's constant bits. + if (IsSrcConstant[SrcIdx]) { + if (UndefSrcElts[SrcIdx][M]) Mask[i] = SM_SentinelUndef; - else if (AllZero) + else if (SrcEltBits[SrcIdx][M] == 0) Mask[i] = SM_SentinelZero; - continue; } } @@ -5715,11 +5783,16 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, unsigned Opcode = N.getOpcode(); switch (Opcode) { - case ISD::AND: { + case ISD::AND: + case X86ISD::ANDNP: { // Attempt to decode as a per-byte mask. - SmallBitVector UndefElts; + APInt UndefElts; SmallVector EltBits; - if (!getTargetConstantBitsFromNode(N.getOperand(1), 8, UndefElts, EltBits)) + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + bool IsAndN = (X86ISD::ANDNP == Opcode); + uint64_t ZeroMask = IsAndN ? 255 : 0; + if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits)) return false; for (int i = 0, e = (int)EltBits.size(); i != e; ++i) { if (UndefElts[i]) { @@ -5729,9 +5802,55 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, uint64_t ByteBits = EltBits[i].getZExtValue(); if (ByteBits != 0 && ByteBits != 255) return false; - Mask.push_back(ByteBits == 0 ? SM_SentinelZero : i); + Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i); } - Ops.push_back(N.getOperand(0)); + Ops.push_back(IsAndN ? N1 : N0); + return true; + } + case ISD::SCALAR_TO_VECTOR: { + // Match against a scalar_to_vector of an extract from a similar vector. + SDValue N0 = N.getOperand(0); + if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + N0.getOperand(0).getValueType() != VT || + !isa(N0.getOperand(1)) || + NumElts <= N0.getConstantOperandVal(1) || + !N->isOnlyUserOf(N0.getNode())) + return false; + Ops.push_back(N0.getOperand(0)); + Mask.push_back(N0.getConstantOperandVal(1)); + Mask.append(NumElts - 1, SM_SentinelUndef); + return true; + } + case X86ISD::PINSRB: + case X86ISD::PINSRW: { + SDValue InVec = N.getOperand(0); + SDValue InScl = N.getOperand(1); + uint64_t InIdx = N.getConstantOperandVal(2); + assert(InIdx < NumElts && "Illegal insertion index"); + + // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern. + if (X86::isZeroNode(InScl)) { + Ops.push_back(InVec); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i); + return true; + } + + // Attempt to recognise a PINSR*(ASSERTZEXT(PEXTR*)) shuffle pattern. + // TODO: Expand this to support INSERT_VECTOR_ELT/etc. + unsigned ExOp = + (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW); + if (InScl.getOpcode() != ISD::AssertZext || + InScl.getOperand(0).getOpcode() != ExOp) + return false; + + SDValue ExVec = InScl.getOperand(0).getOperand(0); + uint64_t ExIdx = InScl.getOperand(0).getConstantOperandVal(1); + assert(ExIdx < NumElts && "Illegal extraction index"); + Ops.push_back(InVec); + Ops.push_back(ExVec); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(i == InIdx ? NumElts + ExIdx : i); return true; } case X86ISD::VSHLI: @@ -5766,6 +5885,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, } return true; } + case ISD::ZERO_EXTEND_VECTOR_INREG: case X86ISD::VZEXT: { // TODO - add support for VPMOVZX with smaller input vector types. SDValue Src = N.getOperand(0); @@ -5781,36 +5901,38 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, return false; } +/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly. +static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, + SmallVectorImpl &Mask) { + int MaskWidth = Mask.size(); + SmallVector UsedInputs; + for (int i = 0, e = Inputs.size(); i < e; ++i) { + int lo = UsedInputs.size() * MaskWidth; + int hi = lo + MaskWidth; + if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) { + UsedInputs.push_back(Inputs[i]); + continue; + } + for (int &M : Mask) + if (lo <= M) + M -= MaskWidth; + } + Inputs = UsedInputs; +} + /// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs /// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the /// remaining input indices in case we now have a unary shuffle and adjust the -/// Op0/Op1 inputs accordingly. +/// inputs accordingly. /// Returns true if the target shuffle mask was decoded. -static bool resolveTargetShuffleInputs(SDValue Op, SDValue &Op0, SDValue &Op1, +static bool resolveTargetShuffleInputs(SDValue Op, + SmallVectorImpl &Inputs, SmallVectorImpl &Mask) { - SmallVector Ops; - if (!setTargetShuffleZeroElements(Op, Mask, Ops)) - if (!getFauxShuffleMask(Op, Mask, Ops)) + if (!setTargetShuffleZeroElements(Op, Mask, Inputs)) + if (!getFauxShuffleMask(Op, Mask, Inputs)) return false; - int NumElts = Mask.size(); - bool Op0InUse = any_of(Mask, [NumElts](int Idx) { - return 0 <= Idx && Idx < NumElts; - }); - bool Op1InUse = any_of(Mask, [NumElts](int Idx) { return NumElts <= Idx; }); - - Op0 = Op0InUse ? Ops[0] : SDValue(); - Op1 = Op1InUse ? Ops[1] : SDValue(); - - // We're only using Op1 - commute the mask and inputs. - if (!Op0InUse && Op1InUse) { - for (int &M : Mask) - if (NumElts <= M) - M -= NumElts; - Op0 = Op1; - Op1 = SDValue(); - } - + resolveTargetShuffleInputsAndMask(Inputs, Mask); return true; } @@ -5885,10 +6007,9 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, /// Custom lower build_vector of v16i8. static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, - unsigned NumNonZero, unsigned NumZero, - SelectionDAG &DAG, - const X86Subtarget &Subtarget, - const TargetLowering &TLI) { + unsigned NumNonZero, unsigned NumZero, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { if (NumNonZero > 8) return SDValue(); @@ -5899,18 +6020,26 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, // SSE4.1 - use PINSRB to insert each byte directly. if (Subtarget.hasSSE41()) { for (unsigned i = 0; i < 16; ++i) { - bool isNonZero = (NonZeros & (1 << i)) != 0; - if (isNonZero) { + bool IsNonZero = (NonZeros & (1 << i)) != 0; + if (IsNonZero) { + // If the build vector contains zeros or our first insertion is not the + // first index then insert into zero vector to break any register + // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL. if (First) { - if (NumZero) - V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl); - else - V = DAG.getUNDEF(MVT::v16i8); First = false; + if (NumZero || 0 != i) + V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl); + else { + assert(0 == i && "Expected insertion into zero-index"); + V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); + V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); + V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); + V = DAG.getBitcast(MVT::v16i8, V); + continue; + } } - V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, - MVT::v16i8, V, Op.getOperand(i), - DAG.getIntPtrConstant(i, dl)); + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V, + Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); } } @@ -5929,24 +6058,35 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, } if ((i & 1) != 0) { + // FIXME: Investigate extending to i32 instead of just i16. + // FIXME: Investigate combining the first 4 bytes as a i32 instead. SDValue ThisElt, LastElt; - bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; + bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0; if (LastIsNonZero) { - LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl, - MVT::i16, Op.getOperand(i-1)); + LastElt = + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1)); } if (ThisIsNonZero) { ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i)); - ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, - ThisElt, DAG.getConstant(8, dl, MVT::i8)); + ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt, + DAG.getConstant(8, dl, MVT::i8)); if (LastIsNonZero) ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt); } else ThisElt = LastElt; - if (ThisElt.getNode()) - V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, - DAG.getIntPtrConstant(i/2, dl)); + if (ThisElt) { + if (1 == i) { + V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32) + : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32); + V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); + V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); + V = DAG.getBitcast(MVT::v8i16, V); + } else { + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, + DAG.getIntPtrConstant(i / 2, dl)); + } + } } } @@ -5957,8 +6097,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, - const X86Subtarget &Subtarget, - const TargetLowering &TLI) { + const X86Subtarget &Subtarget) { if (NumNonZero > 4) return SDValue(); @@ -5966,18 +6105,26 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, SDValue V; bool First = true; for (unsigned i = 0; i < 8; ++i) { - bool isNonZero = (NonZeros & (1 << i)) != 0; - if (isNonZero) { + bool IsNonZero = (NonZeros & (1 << i)) != 0; + if (IsNonZero) { + // If the build vector contains zeros or our first insertion is not the + // first index then insert into zero vector to break any register + // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL. if (First) { - if (NumZero) - V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); - else - V = DAG.getUNDEF(MVT::v8i16); First = false; + if (NumZero || 0 != i) + V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); + else { + assert(0 == i && "Expected insertion into zero-index"); + V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); + V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); + V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); + V = DAG.getBitcast(MVT::v8i16, V); + continue; + } } - V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, - MVT::v8i16, V, Op.getOperand(i), - DAG.getIntPtrConstant(i, dl)); + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, + Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); } } @@ -5986,8 +6133,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// Custom lower build_vector of v4i32 or v4f32. static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, - const X86Subtarget &Subtarget, - const TargetLowering &TLI) { + const X86Subtarget &Subtarget) { // Find all zeroable elements. std::bitset<4> Zeroable; for (int i=0; i < 4; ++i) { @@ -6183,7 +6329,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, /// /// Example: -> zextload a static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, - SDLoc &DL, SelectionDAG &DAG, + const SDLoc &DL, SelectionDAG &DAG, bool isAfterLegalize) { unsigned NumElems = Elts.size(); @@ -6347,14 +6493,14 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, return SDValue(); } -static Constant *getConstantVector(MVT VT, APInt SplatValue, +static Constant *getConstantVector(MVT VT, const APInt &SplatValue, unsigned SplatBitSize, LLVMContext &C) { unsigned ScalarSize = VT.getScalarSizeInBits(); unsigned NumElm = SplatBitSize / ScalarSize; SmallVector ConstantVec; for (unsigned i = 0; i < NumElm; i++) { - APInt Val = SplatValue.lshr(ScalarSize * i).trunc(ScalarSize); + APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); Constant *Const; if (VT.isFloatingPoint()) { assert((ScalarSize == 32 || ScalarSize == 64) && @@ -6635,6 +6781,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); SDValue ExtIdx = Op.getOperand(i).getOperand(1); + // Quit if non-constant index. if (!isa(ExtIdx)) return SDValue(); @@ -6665,11 +6812,10 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask); - for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) { - unsigned Idx = InsertIndices[i]; + + for (unsigned Idx : InsertIndices) NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), DAG.getIntPtrConstant(Idx, DL)); - } return NV; } @@ -6932,23 +7078,24 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); } -/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB -/// node. -static SDValue LowerToAddSub(const BuildVectorSDNode *BV, - const X86Subtarget &Subtarget, SelectionDAG &DAG) { +/// Returns true iff \p BV builds a vector with the result equivalent to +/// the result of ADDSUB operation. +/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation +/// are written to the parameters \p Opnd0 and \p Opnd1. +static bool isAddSub(const BuildVectorSDNode *BV, + const X86Subtarget &Subtarget, SelectionDAG &DAG, + SDValue &Opnd0, SDValue &Opnd1) { + MVT VT = BV->getSimpleValueType(0); if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && - (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) - return SDValue(); + (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) && + (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64))) + return false; - SDLoc DL(BV); unsigned NumElts = VT.getVectorNumElements(); SDValue InVec0 = DAG.getUNDEF(VT); SDValue InVec1 = DAG.getUNDEF(VT); - assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 || - VT == MVT::v2f64) && "build_vector with an invalid type found!"); - // Odd-numbered elements in the input build vector are obtained from // adding two integer/float elements. // Even-numbered elements in the input build vector are obtained from @@ -6970,7 +7117,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, // Early exit if we found an unexpected opcode. if (Opcode != ExpectedOpcode) - return SDValue(); + return false; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -6983,11 +7130,11 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, !isa(Op0.getOperand(1)) || !isa(Op1.getOperand(1)) || Op0.getOperand(1) != Op1.getOperand(1)) - return SDValue(); + return false; unsigned I0 = cast(Op0.getOperand(1))->getZExtValue(); if (I0 != i) - return SDValue(); + return false; // We found a valid add/sub node. Update the information accordingly. if (i & 1) @@ -6999,39 +7146,118 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, if (InVec0.isUndef()) { InVec0 = Op0.getOperand(0); if (InVec0.getSimpleValueType() != VT) - return SDValue(); + return false; } if (InVec1.isUndef()) { InVec1 = Op1.getOperand(0); if (InVec1.getSimpleValueType() != VT) - return SDValue(); + return false; } // Make sure that operands in input to each add/sub node always // come from a same pair of vectors. if (InVec0 != Op0.getOperand(0)) { if (ExpectedOpcode == ISD::FSUB) - return SDValue(); + return false; // FADD is commutable. Try to commute the operands // and then test again. std::swap(Op0, Op1); if (InVec0 != Op0.getOperand(0)) - return SDValue(); + return false; } if (InVec1 != Op1.getOperand(0)) - return SDValue(); + return false; // Update the pair of expected opcodes. std::swap(ExpectedOpcode, NextExpectedOpcode); } // Don't try to fold this build_vector into an ADDSUB if the inputs are undef. - if (AddFound && SubFound && !InVec0.isUndef() && !InVec1.isUndef()) - return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1); + if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef()) + return false; - return SDValue(); + Opnd0 = InVec0; + Opnd1 = InVec1; + return true; +} + +/// Returns true if is possible to fold MUL and an idiom that has already been +/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1). +/// If (and only if) true is returned, the operands of FMADDSUB are written to +/// parameters \p Opnd0, \p Opnd1, \p Opnd2. +/// +/// Prior to calling this function it should be known that there is some +/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation +/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called +/// before replacement of such SDNode with ADDSUB operation. Thus the number +/// of \p Opnd0 uses is expected to be equal to 2. +/// For example, this function may be called for the following IR: +/// %AB = fmul fast <2 x double> %A, %B +/// %Sub = fsub fast <2 x double> %AB, %C +/// %Add = fadd fast <2 x double> %AB, %C +/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, +/// <2 x i32> +/// There is a def for %Addsub here, which potentially can be replaced by +/// X86ISD::ADDSUB operation: +/// %Addsub = X86ISD::ADDSUB %AB, %C +/// and such ADDSUB can further be replaced with FMADDSUB: +/// %Addsub = FMADDSUB %A, %B, %C. +/// +/// The main reason why this method is called before the replacement of the +/// recognized ADDSUB idiom with ADDSUB operation is that such replacement +/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit +/// FMADDSUB is. +static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG, + SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) { + if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 || + !Subtarget.hasAnyFMA()) + return false; + + // FIXME: These checks must match the similar ones in + // DAGCombiner::visitFADDForFMACombine. It would be good to have one + // function that would answer if it is Ok to fuse MUL + ADD to FMADD + // or MUL + ADDSUB to FMADDSUB. + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + if (!AllowFusion) + return false; + + Opnd2 = Opnd1; + Opnd1 = Opnd0.getOperand(1); + Opnd0 = Opnd0.getOperand(0); + + return true; +} + +/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation +/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node. +static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDValue Opnd0, Opnd1; + if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1)) + return SDValue(); + + MVT VT = BV->getSimpleValueType(0); + SDLoc DL(BV); + + // Try to generate X86ISD::FMADDSUB node here. + SDValue Opnd2; + if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2)) + return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2); + + // Do not generate X86ISD::ADDSUB node for 512-bit types even though + // the ADDSUB idiom has been successfully recognized. There are no known + // X86 targets with 512-bit ADDSUB instructions! + // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom + // recognition. + if (VT.is512BitVector()) + return SDValue(); + + return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); } /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible. @@ -7260,7 +7486,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return VectorConstant; BuildVectorSDNode *BV = cast(Op.getNode()); - if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG)) + if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG)) return AddSub; if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) return HorizontalOp; @@ -7309,7 +7535,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // a constant pool load than it is to do a movd + shuffle. if (ExtVT == MVT::i64 && !Subtarget.is64Bit() && (!IsAllConstants || Idx == 0)) { - if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { + if (DAG.MaskedValueIsZero(Item, APInt::getHighBitsSet(64, 32))) { // Handle SSE only. assert(VT == MVT::v2i64 && "Expected an SSE value type!"); MVT VecVT = MVT::v4i32; @@ -7452,17 +7678,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // If element VT is < 32 bits, convert it to inserts into a zero vector. if (EVTBits == 8 && NumElems == 16) if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero, - DAG, Subtarget, *this)) + DAG, Subtarget)) return V; if (EVTBits == 16 && NumElems == 8) if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero, - DAG, Subtarget, *this)) + DAG, Subtarget)) return V; // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS if (EVTBits == 32 && NumElems == 4) - if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this)) + if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget)) return V; // If element VT is == 32 bits, turn it into a number of shuffles. @@ -7658,7 +7884,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl); if (V1.isUndef()) - V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal); if (IsZeroV1) return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal); @@ -7779,25 +8005,61 @@ is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef Mask, return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask); } -/// \brief Checks whether a shuffle mask is equivalent to an explicit list of -/// arguments. -/// -/// This is a fast way to test a shuffle mask against a fixed pattern: -/// -/// if (isShuffleEquivalent(Mask, 3, 2, {1, 0})) { ... } -/// -/// It returns true if the mask is exactly as wide as the argument list, and -/// each element of the mask is either -1 (signifying undef) or the value given -/// in the argument. -static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef Mask, - ArrayRef ExpectedMask) { - if (Mask.size() != ExpectedMask.size()) - return false; - +/// Test whether a target shuffle mask is equivalent within each sub-lane. +/// Unlike isRepeatedShuffleMask we must respect SM_SentinelZero. +static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT, + ArrayRef Mask, + SmallVectorImpl &RepeatedMask) { + int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); + RepeatedMask.assign(LaneSize, SM_SentinelUndef); int Size = Mask.size(); - - // If the values are build vectors, we can look through them to find - // equivalent inputs that make the shuffles equivalent. + for (int i = 0; i < Size; ++i) { + assert(isUndefOrZero(Mask[i]) || (Mask[i] >= 0)); + if (Mask[i] == SM_SentinelUndef) + continue; + if (Mask[i] == SM_SentinelZero) { + if (!isUndefOrZero(RepeatedMask[i % LaneSize])) + return false; + RepeatedMask[i % LaneSize] = SM_SentinelZero; + continue; + } + if ((Mask[i] % Size) / LaneSize != i / LaneSize) + // This entry crosses lanes, so there is no way to model this shuffle. + return false; + + // Ok, handle the in-lane shuffles by detecting if and when they repeat. + // Adjust second vector indices to start at LaneSize instead of Size. + int LocalM = + Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize; + if (RepeatedMask[i % LaneSize] == SM_SentinelUndef) + // This is the first non-undef entry in this slot of a 128-bit lane. + RepeatedMask[i % LaneSize] = LocalM; + else if (RepeatedMask[i % LaneSize] != LocalM) + // Found a mismatch with the repeated mask. + return false; + } + return true; +} + +/// \brief Checks whether a shuffle mask is equivalent to an explicit list of +/// arguments. +/// +/// This is a fast way to test a shuffle mask against a fixed pattern: +/// +/// if (isShuffleEquivalent(Mask, 3, 2, {1, 0})) { ... } +/// +/// It returns true if the mask is exactly as wide as the argument list, and +/// each element of the mask is either -1 (signifying undef) or the value given +/// in the argument. +static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef Mask, + ArrayRef ExpectedMask) { + if (Mask.size() != ExpectedMask.size()) + return false; + + int Size = Mask.size(); + + // If the values are build vectors, we can look through them to find + // equivalent inputs that make the shuffles equivalent. auto *BV1 = dyn_cast(V1); auto *BV2 = dyn_cast(V2); @@ -7811,7 +8073,7 @@ static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef Mask, ExpectedBV->getOperand(ExpectedMask[i] % Size)) return false; } -} + } return true; } @@ -7864,7 +8126,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef Mask) { return Imm; } -static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, SDLoc DL, +static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, const SDLoc &DL, SelectionDAG &DAG) { return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); } @@ -7877,9 +8139,9 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, SDLoc DL, /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle /// as many lanes with this technique as possible to simplify the remaining /// shuffle. -static SmallBitVector computeZeroableShuffleElements(ArrayRef Mask, - SDValue V1, SDValue V2) { - SmallBitVector Zeroable(Mask.size(), false); +static APInt computeZeroableShuffleElements(ArrayRef Mask, + SDValue V1, SDValue V2) { + APInt Zeroable(Mask.size(), 0); V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); @@ -7894,7 +8156,7 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef Mask, int M = Mask[i]; // Handle the easy cases. if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { - Zeroable[i] = true; + Zeroable.setBit(i); continue; } @@ -7912,17 +8174,19 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef Mask, int Scale = Size / V->getNumOperands(); SDValue Op = V.getOperand(M / Scale); if (Op.isUndef() || X86::isZeroNode(Op)) - Zeroable[i] = true; + Zeroable.setBit(i); else if (ConstantSDNode *Cst = dyn_cast(Op)) { APInt Val = Cst->getAPIntValue(); Val = Val.lshr((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); - Zeroable[i] = (Val == 0); + if (Val == 0) + Zeroable.setBit(i); } else if (ConstantFPSDNode *Cst = dyn_cast(Op)) { APInt Val = Cst->getValueAPF().bitcastToAPInt(); Val = Val.lshr((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); - Zeroable[i] = (Val == 0); + if (Val == 0) + Zeroable.setBit(i); } continue; } @@ -7936,7 +8200,8 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef Mask, SDValue Op = V.getOperand((M * Scale) + j); AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op)); } - Zeroable[i] = AllZeroable; + if (AllZeroable) + Zeroable.setBit(i); continue; } } @@ -7944,11 +8209,43 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef Mask, return Zeroable; } +// The Shuffle result is as follow: +// 0*a[0]0*a[1]...0*a[n] , n >=0 where a[] elements in a ascending order. +// Each Zeroable's element correspond to a particular Mask's element. +// As described in computeZeroableShuffleElements function. +// +// The function looks for a sub-mask that the nonzero elements are in +// increasing order. If such sub-mask exist. The function returns true. +static bool isNonZeroElementsInOrder(const APInt &Zeroable, + ArrayRef Mask, const EVT &VectorType, + bool &IsZeroSideLeft) { + int NextElement = -1; + // Check if the Mask's nonzero elements are in increasing order. + for (int i = 0, e = Mask.size(); i < e; i++) { + // Checks if the mask's zeros elements are built from only zeros. + assert(Mask[i] >= -1 && "Out of bound mask element!"); + if (Mask[i] < 0) + return false; + if (Zeroable[i]) + continue; + // Find the lowest non zero element + if (NextElement < 0) { + NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0; + IsZeroSideLeft = NextElement != 0; + } + // Exit if the mask's non zero elements are not in increasing order. + if (NextElement != Mask[i]) + return false; + NextElement++; + } + return true; +} + /// Try to lower a shuffle with a single PSHUFB of V1 or V2. static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, - const SmallBitVector &Zeroable, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { int Size = Mask.size(); @@ -7999,6 +8296,121 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT, DAG.getBuildVector(I8VT, DL, PSHUFBMask))); } +static SDValue getMaskNode(SDValue Mask, MVT MaskVT, + const X86Subtarget &Subtarget, SelectionDAG &DAG, + const SDLoc &dl); + +// X86 has dedicated shuffle that can be lowered to VEXPAND +static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT, + const APInt &Zeroable, + ArrayRef Mask, SDValue &V1, + SDValue &V2, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + bool IsLeftZeroSide = true; + if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(), + IsLeftZeroSide)) + return SDValue(); + unsigned VEXPANDMask = (~Zeroable).getZExtValue(); + MVT IntegerType = + MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); + SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType); + unsigned NumElts = VT.getVectorNumElements(); + assert((NumElts == 4 || NumElts == 8 || NumElts == 16) && + "Unexpected number of vector elements"); + SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts), + Subtarget, DAG, DL); + SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); + SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; + return DAG.getNode(ISD::VSELECT, DL, VT, VMask, + DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), + ZeroVector); +} + +static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, + unsigned &UnpackOpcode, bool IsUnary, + ArrayRef TargetMask, SDLoc &DL, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + int NumElts = VT.getVectorNumElements(); + + bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true; + for (int i = 0; i != NumElts; i += 2) { + int M1 = TargetMask[i + 0]; + int M2 = TargetMask[i + 1]; + Undef1 &= (SM_SentinelUndef == M1); + Undef2 &= (SM_SentinelUndef == M2); + Zero1 &= isUndefOrZero(M1); + Zero2 &= isUndefOrZero(M2); + } + assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) && + "Zeroable shuffle detected"); + + // Attempt to match the target mask against the unpack lo/hi mask patterns. + SmallVector Unpckl, Unpckh; + createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary); + if (isTargetShuffleEquivalent(TargetMask, Unpckl)) { + UnpackOpcode = X86ISD::UNPCKL; + V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); + V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); + return true; + } + + createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary); + if (isTargetShuffleEquivalent(TargetMask, Unpckh)) { + UnpackOpcode = X86ISD::UNPCKH; + V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); + V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); + return true; + } + + // If an unary shuffle, attempt to match as an unpack lo/hi with zero. + if (IsUnary && (Zero1 || Zero2)) { + // Don't bother if we can blend instead. + if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) && + isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0)) + return false; + + bool MatchLo = true, MatchHi = true; + for (int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) { + int M = TargetMask[i]; + + // Ignore if the input is known to be zero or the index is undef. + if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) || + (M == SM_SentinelUndef)) + continue; + + MatchLo &= (M == Unpckl[i]); + MatchHi &= (M == Unpckh[i]); + } + + if (MatchLo || MatchHi) { + UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH; + V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; + V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; + return true; + } + } + + // If a binary shuffle, commute and try again. + if (!IsUnary) { + ShuffleVectorSDNode::commuteMask(Unpckl); + if (isTargetShuffleEquivalent(TargetMask, Unpckl)) { + UnpackOpcode = X86ISD::UNPCKL; + std::swap(V1, V2); + return true; + } + + ShuffleVectorSDNode::commuteMask(Unpckh); + if (isTargetShuffleEquivalent(TargetMask, Unpckh)) { + UnpackOpcode = X86ISD::UNPCKH; + std::swap(V1, V2); + return true; + } + } + + return false; +} + // X86 has dedicated unpack instructions that can handle specific blend // operations: UNPCKH and UNPCKL. static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT, @@ -8032,13 +8444,12 @@ static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT, /// one of the inputs being zeroable. static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SelectionDAG &DAG) { assert(!VT.isFloatingPoint() && "Floating point types are not supported"); MVT EltVT = VT.getVectorElementType(); SDValue Zero = DAG.getConstant(0, DL, EltVT); - SDValue AllOnes = - DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, EltVT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT); SmallVector VMaskOps(Mask.size(), Zero); SDValue V; for (int i = 0, Size = Mask.size(); i < Size; ++i) { @@ -8070,10 +8481,8 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, SelectionDAG &DAG) { assert(VT.isInteger() && "Only supports integer vector types!"); MVT EltVT = VT.getVectorElementType(); - int NumEltBits = EltVT.getSizeInBits(); SDValue Zero = DAG.getConstant(0, DL, EltVT); - SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, - EltVT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT); SmallVector MaskOps; for (int i = 0, Size = Mask.size(); i < Size; ++i) { if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size) @@ -8091,6 +8500,11 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, return DAG.getNode(ISD::OR, DL, VT, V1, V2); } +static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, + SDValue PreservedSrc, + const X86Subtarget &Subtarget, + SelectionDAG &DAG); + /// \brief Try to emit a blend instruction for a shuffle. /// /// This doesn't do any checks for the availability of instructions for blending @@ -8099,7 +8513,7 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, /// that the shuffle mask is a blend, or convertible into a blend with zero. static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Original, - const SmallBitVector &Zeroable, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); @@ -8110,7 +8524,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, // Attempt to generate the binary blend mask. If an input is zero then // we can use any lane. // TODO: generalize the zero matching to any scalar like isShuffleEquivalent. - unsigned BlendMask = 0; + uint64_t BlendMask = 0; for (int i = 0, Size = Mask.size(); i < Size; ++i) { int M = Mask[i]; if (M < 0) @@ -8118,7 +8532,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, if (M == i) continue; if (M == i + Size) { - BlendMask |= 1u << i; + BlendMask |= 1ull << i; continue; } if (Zeroable[i]) { @@ -8129,7 +8543,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, } if (V2IsZero) { ForceV2Zero = true; - BlendMask |= 1u << i; + BlendMask |= 1ull << i; Mask[i] = i + Size; continue; } @@ -8143,12 +8557,11 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, if (ForceV2Zero) V2 = getZeroVector(VT, Subtarget, DAG, DL); - auto ScaleBlendMask = [](unsigned BlendMask, int Size, int Scale) { - unsigned ScaledMask = 0; + auto ScaleBlendMask = [](uint64_t BlendMask, int Size, int Scale) { + uint64_t ScaledMask = 0; for (int i = 0; i != Size; ++i) - if (BlendMask & (1u << i)) - for (int j = 0; j != Scale; ++j) - ScaledMask |= 1u << (i * Scale + j); + if (BlendMask & (1ull << i)) + ScaledMask |= ((1ull << Scale) - 1) << (i * Scale); return ScaledMask; }; @@ -8201,7 +8614,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, BlendMask = 0; for (int i = 0; i < 8; ++i) if (RepeatedMask[i] >= 8) - BlendMask |= 1u << i; + BlendMask |= 1ull << i; return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, DAG.getConstant(BlendMask, DL, MVT::i8)); } @@ -8212,6 +8625,13 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, assert((VT.is128BitVector() || Subtarget.hasAVX2()) && "256-bit byte-blends require AVX2 support!"); + if (Subtarget.hasBWI() && Subtarget.hasVLX()) { + MVT IntegerType = + MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); + SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); + return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); + } + // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG)) @@ -8249,7 +8669,17 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, VT, DAG.getNode(ISD::VSELECT, DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask), V1, V2)); } - + case MVT::v16f32: + case MVT::v8f64: + case MVT::v8i64: + case MVT::v16i32: + case MVT::v32i16: + case MVT::v64i8: { + MVT IntegerType = + MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); + SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); + return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); + } default: llvm_unreachable("Not a supported integer vector type!"); } @@ -8541,7 +8971,7 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT, static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef Mask, int MaskOffset, - const SmallBitVector &Zeroable, + const APInt &Zeroable, const X86Subtarget &Subtarget) { int Size = Mask.size(); unsigned SizeInBits = Size * ScalarSizeInBits; @@ -8603,7 +9033,7 @@ static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { int Size = Mask.size(); @@ -8639,12 +9069,12 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, /// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SelectionDAG &DAG) { int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); - assert(!Zeroable.all() && "Fully zeroable shuffle mask"); + assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask"); // Upper half must be undefined. if (!isUndefInRange(Mask, HalfSize, HalfSize)) @@ -8818,14 +9248,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), NumElements / Scale); InputV = ShuffleOffset(InputV); - - // For 256-bit vectors, we only need the lower (128-bit) input half. - // For 512-bit vectors, we only need the lower input half or quarter. - if (VT.getSizeInBits() > 128) - InputV = extractSubVector(InputV, 0, DAG, DL, - std::max(128, (int)VT.getSizeInBits() / Scale)); - - InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV); + InputV = getExtendInVec(X86ISD::VZEXT, DL, ExtVT, InputV, DAG); return DAG.getBitcast(VT, InputV); } @@ -8942,7 +9365,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( /// are both incredibly common and often quite performance sensitive. static SDValue lowerVectorShuffleAsZeroOrAnyExtend( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { int Bits = VT.getSizeInBits(); int NumLanes = Bits / 128; @@ -9098,7 +9521,7 @@ static bool isShuffleFoldableLoad(SDValue V) { /// across all subtarget feature sets. static SDValue lowerVectorShuffleAsElementInsertion( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT ExtVT = VT; MVT EltVT = VT.getVectorElementType(); @@ -9396,7 +9819,16 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, if (((BroadcastIdx * EltSize) % 128) != 0) return SDValue(); - MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize); + // The shuffle input might have been a bitcast we looked through; look at + // the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll + // later bitcast it to BroadcastVT. + MVT SrcVT = V.getSimpleValueType(); + assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() && + "Unexpected vector element size"); + assert(SrcVT.getVectorNumElements() == BroadcastVT.getVectorNumElements() && + "Unexpected vector num elements"); + + MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / EltSize); V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V, DAG.getIntPtrConstant(BroadcastIdx, DL)); } @@ -9426,6 +9858,12 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts); } + // We only support broadcasting from 128-bit vectors to minimize the + // number of patterns we need to deal with in isel. So extract down to + // 128-bits. + if (SrcVT.getSizeInBits() > 128) + V = extract128BitVector(V, 0, DAG, DL); + return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V)); } @@ -9437,7 +9875,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, // elements are zeroable. static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2, unsigned &InsertPSMask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, ArrayRef Mask, SelectionDAG &DAG) { assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!"); @@ -9526,7 +9964,7 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2, static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); @@ -9661,7 +10099,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT, /// it is better to avoid lowering through this for integer vectors where /// possible. static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -9743,7 +10181,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// it falls back to the floating point shuffle operation with appropriate bit /// casting. static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -9962,7 +10400,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT, /// domain crossing penalties, as these are sufficient to implement all v4f32 /// shuffles. static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -10045,7 +10483,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// We try to handle these with integer-domain shuffles where we can, but for /// blends we use the floating point domain blend instructions. static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -10119,26 +10557,31 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; - // If we have direct support for blends, we should lower by decomposing into - // a permute. That will be faster than the domain cross. - if (IsBlendSupported) - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, - Mask, DAG); - - // Try to lower by permuting the inputs into an unpack instruction. - if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, - V2, Mask, DAG)) - return Unpack; + // Assume that a single SHUFPS is faster than an alternative sequence of + // multiple instructions (even if the CPU has a domain penalty). + // If some CPU is harmed by the domain switch, we can fix it in a later pass. + if (!isSingleSHUFPSMask(Mask)) { + // If we have direct support for blends, we should lower by decomposing into + // a permute. That will be faster than the domain cross. + if (IsBlendSupported) + return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, + Mask, DAG); + + // Try to lower by permuting the inputs into an unpack instruction. + if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( + DL, MVT::v4i32, V1, V2, Mask, DAG)) + return Unpack; + } // We implement this with SHUFPS because it can blend from two vectors. // Because we're going to eventually use SHUFPS, we use SHUFPS even to build // up the inputs, bypassing domain shift penalties that we would encur if we // directly used PSHUFD on Nehalem and older. For newer chips, this isn't // relevant. - return DAG.getBitcast( - MVT::v4i32, - DAG.getVectorShuffle(MVT::v4f32, DL, DAG.getBitcast(MVT::v4f32, V1), - DAG.getBitcast(MVT::v4f32, V2), Mask)); + SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1); + SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2); + SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask); + return DAG.getBitcast(MVT::v4i32, ShufPS); } /// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 @@ -10168,13 +10611,11 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( MutableArrayRef HiMask = Mask.slice(4, 4); SmallVector LoInputs; - std::copy_if(LoMask.begin(), LoMask.end(), std::back_inserter(LoInputs), - [](int M) { return M >= 0; }); + copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; }); std::sort(LoInputs.begin(), LoInputs.end()); LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); SmallVector HiInputs; - std::copy_if(HiMask.begin(), HiMask.end(), std::back_inserter(HiInputs), - [](int M) { return M >= 0; }); + copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; }); std::sort(HiInputs.begin(), HiInputs.end()); HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); int NumLToL = @@ -10353,7 +10794,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( }; if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3)) return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4); - else if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3)) + if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3)) return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0); // At this point there are at most two inputs to the low and high halves from @@ -10609,7 +11050,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( /// blend if only one input is used. static SDValue lowerVectorShuffleAsBlendOfPSHUFBs( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const SmallBitVector &Zeroable, SelectionDAG &DAG, bool &V1InUse, + const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) { SDValue V1Mask[16]; SDValue V2Mask[16]; @@ -10670,7 +11111,7 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs( /// halves of the inputs separately (making them have relatively few inputs) /// and then concatenate them. static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -10854,7 +11295,7 @@ static int canLowerByDroppingEvenElements(ArrayRef Mask, /// the existing lowering for v8i16 blends on each half, finally PACK-ing them /// back together. static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -10911,14 +11352,13 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (!canWidenViaDuplication(Mask)) return SDValue(); SmallVector LoInputs; - std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs), - [](int M) { return M >= 0 && M < 8; }); + copy_if(Mask, std::back_inserter(LoInputs), + [](int M) { return M >= 0 && M < 8; }); std::sort(LoInputs.begin(), LoInputs.end()); LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); SmallVector HiInputs; - std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(HiInputs), - [](int M) { return M >= 8; }); + copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; }); std::sort(HiInputs.begin(), HiInputs.end()); HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); @@ -11144,7 +11584,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// dispatches to the lowering routines accordingly. static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, - const SmallBitVector &Zeroable, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { switch (VT.SimpleTy) { @@ -11400,9 +11840,13 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT, /// \brief Handle lowering 2-lane 128-bit shuffles. static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + SmallVector WidenedMask; + if (!canWidenShuffleElements(Mask, WidenedMask)) + return SDValue(); + // TODO: If minimizing size and one of the inputs is a zero vector and the // the zero vector has only one use, we could use a VPERM2X128 to save the // instruction bytes needed to explicitly generate the zero vector. @@ -11450,15 +11894,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // [6] - ignore // [7] - zero high half of destination - int MaskLO = Mask[0]; - if (MaskLO == SM_SentinelUndef) - MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1]; + int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0]; + int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1]; - int MaskHI = Mask[2]; - if (MaskHI == SM_SentinelUndef) - MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3]; - - unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4; + unsigned PermMask = MaskLO | (MaskHI << 4); // If either input is a zero vector, replace it with an undef input. // Shuffle mask values < 4 are selecting elements of V1. @@ -11467,16 +11906,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // selecting the zero vector and setting the zero mask bit. if (IsV1Zero) { V1 = DAG.getUNDEF(VT); - if (MaskLO < 4) + if (MaskLO < 2) PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI < 4) + if (MaskHI < 2) PermMask = (PermMask & 0x0f) | 0x80; } if (IsV2Zero) { V2 = DAG.getUNDEF(VT); - if (MaskLO >= 4) + if (MaskLO >= 2) PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI >= 4) + if (MaskHI >= 2) PermMask = (PermMask & 0x0f) | 0x80; } @@ -11871,7 +12310,7 @@ static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, unsigned &ShuffleImm, ArrayRef Mask) { int NumElts = VT.getVectorNumElements(); - assert(VT.getScalarType() == MVT::f64 && + assert(VT.getScalarSizeInBits() == 64 && (NumElts == 2 || NumElts == 4 || NumElts == 8) && "Unexpected data type for VSHUFPD"); @@ -11907,6 +12346,9 @@ static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT, ArrayRef Mask, SDValue V1, SDValue V2, SelectionDAG &DAG) { + assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&& + "Unexpected data type for VSHUFPD"); + unsigned Immediate = 0; if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask)) return SDValue(); @@ -11933,7 +12375,7 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT, /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -11941,11 +12383,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); - SmallVector WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) - return V; + if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return V; if (V2.isUndef()) { // Check for being able to broadcast a single element. @@ -12011,6 +12451,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return Result; + // If we have VLX support, we can use VEXPAND. + if (Subtarget.hasVLX()) + if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, + V1, V2, DAG, Subtarget)) + return V; // If we have AVX2 then we always want to lower with a blend because an v4 we // can fully permute the elements. @@ -12027,7 +12472,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v4i64 shuffling.. static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12036,11 +12481,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); - SmallVector WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) - return V; + if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -12076,12 +12519,17 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Shift; - // If we have VLX support, we can use VALIGN. - if (Subtarget.hasVLX()) + // If we have VLX support, we can use VALIGN or VEXPAND. + if (Subtarget.hasVLX()) { if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Rotate; + if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, + V1, V2, DAG, Subtarget)) + return V; + } + // Try to use PALIGNR. if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) @@ -12112,7 +12560,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12182,6 +12630,11 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Result; + // If we have VLX support, we can use VEXPAND. + if (Subtarget.hasVLX()) + if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, + V1, V2, DAG, Subtarget)) + return V; // If we have AVX2 then we always want to lower with a blend because at v8 we // can fully permute the elements. @@ -12198,7 +12651,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v8i32 shuffling.. static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12227,7 +12680,9 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // efficient instructions that mirror the shuffles across the two 128-bit // lanes. SmallVector RepeatedMask; - if (is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask)) { + bool Is128BitLaneRepeatedShuffle = + is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask); + if (Is128BitLaneRepeatedShuffle) { assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); if (V2.isUndef()) return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1, @@ -12244,12 +12699,17 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Shift; - // If we have VLX support, we can use VALIGN. - if (Subtarget.hasVLX()) + // If we have VLX support, we can use VALIGN or EXPAND. + if (Subtarget.hasVLX()) { if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; + if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, + V1, V2, DAG, Subtarget)) + return V; + } + // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) @@ -12268,6 +12728,17 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1); } + // Assume that a single SHUFPS is faster than an alternative sequence of + // multiple instructions (even if the CPU has a domain penalty). + // If some CPU is harmed by the domain switch, we can fix it in a later pass. + if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { + SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1); + SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2); + SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, + CastV1, CastV2, DAG); + return DAG.getBitcast(MVT::v8i32, ShufPS); + } + // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( @@ -12284,7 +12755,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v16i16 shuffling.. static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12370,7 +12841,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v32i8 shuffling.. static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12443,7 +12914,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// together based on the available instructions. static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, - const SmallBitVector &Zeroable, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // If we have a single input to the zero element, insert that into V1 if we @@ -12521,33 +12992,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); + // Check for patterns which can be matched with a single insert of a 256-bit + // subvector. + bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, + {0, 1, 2, 3, 0, 1, 2, 3}); + if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, + {0, 1, 2, 3, 8, 9, 10, 11})) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); + SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, + DAG.getIntPtrConstant(0, DL)); + SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, + OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + } + + assert(WidenedMask.size() == 4); + + // See if this is an insertion of the lower 128-bits of V2 into V1. + bool IsInsert = true; + int V2Index = -1; + for (int i = 0; i < 4; ++i) { + assert(WidenedMask[i] >= -1); + if (WidenedMask[i] < 0) + continue; + + // Make sure all V1 subvectors are in place. + if (WidenedMask[i] < 4) { + if (WidenedMask[i] != i) { + IsInsert = false; + break; + } + } else { + // Make sure we only have a single V2 index and its the lowest 128-bits. + if (V2Index >= 0 || WidenedMask[i] != 4) { + IsInsert = false; + break; + } + V2Index = i; + } + } + if (IsInsert && V2Index >= 0) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); + SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, + DAG.getIntPtrConstant(0, DL)); + return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL); + } + + // Try to lower to to vshuf64x2/vshuf32x4. SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; + unsigned PermMask = 0; // Insure elements came from the same Op. - int MaxOp1Index = VT.getVectorNumElements()/2 - 1; - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - if (WidenedMask[i] == SM_SentinelZero) - return SDValue(); - if (WidenedMask[i] == SM_SentinelUndef) + for (int i = 0; i < 4; ++i) { + assert(WidenedMask[i] >= -1); + if (WidenedMask[i] < 0) continue; - SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1; - unsigned OpIndex = (i < Size/2) ? 0 : 1; + SDValue Op = WidenedMask[i] >= 4 ? V2 : V1; + unsigned OpIndex = i / 2; if (Ops[OpIndex].isUndef()) Ops[OpIndex] = Op; else if (Ops[OpIndex] != Op) return SDValue(); - } - - // Form a 128-bit permutation. - // Convert the 64-bit shuffle mask selection values into 128-bit selection - // bits defined by a vshuf64x2 instruction's immediate control byte. - unsigned PermMask = 0, Imm = 0; - unsigned ControlBitsNum = WidenedMask.size() / 2; - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - // Use first element in place of undef mask. - Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i]; - PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum); + // Convert the 128-bit shuffle mask selection values into 128-bit selection + // bits defined by a vshuf64x2 instruction's immediate control byte. + PermMask |= (WidenedMask[i] % 4) << (i * 2); } return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], @@ -12556,6 +13066,7 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, /// \brief Handle lowering of 8-lane 64-bit floating point shuffles. static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12598,11 +13109,20 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG)) return Op; + if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, + V2, DAG, Subtarget)) + return V; + + if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return Blend; + return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG); } /// \brief Handle lowering of 16-lane 32-bit floating point shuffles. -static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef Mask, +static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12631,16 +13151,24 @@ static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef Mask, lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) return Unpck; + if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return Blend; + // Otherwise, fall back to a SHUFPS sequence. return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG); } + // If we have AVX512F support, we can use VEXPAND. + if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, + V1, V2, DAG, Subtarget)) + return V; return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG); } /// \brief Handle lowering of 8-lane 64-bit integer shuffles. static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12691,13 +13219,21 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (SDValue Unpck = lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) return Unpck; + // If we have AVX512F support, we can use VEXPAND. + if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, + V2, DAG, Subtarget)) + return V; + + if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return Blend; return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG); } /// \brief Handle lowering of 16-lane 32-bit integer shuffles. static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12716,7 +13252,9 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // efficient instructions that mirror the shuffles across the four 128-bit // lanes. SmallVector RepeatedMask; - if (is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask)) { + bool Is128BitLaneRepeatedShuffle = + is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask); + if (Is128BitLaneRepeatedShuffle) { assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); if (V2.isUndef()) return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1, @@ -12744,12 +13282,29 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) return Rotate; + // Assume that a single SHUFPS is faster than using a permv shuffle. + // If some CPU is harmed by the domain switch, we can fix it in a later pass. + if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { + SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1); + SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2); + SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, + CastV1, CastV2, DAG); + return DAG.getBitcast(MVT::v16i32, ShufPS); + } + // If we have AVX512F support, we can use VEXPAND. + if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, + V1, V2, DAG, Subtarget)) + return V; + + if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return Blend; return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); } /// \brief Handle lowering of 32-lane 16-bit integer shuffles. static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12791,12 +13346,16 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, } } + if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return Blend; + return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); } /// \brief Handle lowering of 64-lane 8-bit integer shuffles. static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const SmallBitVector &Zeroable, + const APInt &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12835,6 +13394,16 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (Subtarget.hasVBMI()) return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG); + // Try to create an in-lane repeating shuffle mask and then shuffle the + // the results into the target lanes. + if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( + DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) + return V; + + if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return Blend; + // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); } @@ -12846,7 +13415,7 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// together based on the available instructions. static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, - const SmallBitVector &Zeroable, + const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.hasAVX512() && @@ -12873,9 +13442,9 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, // the requisite ISA extensions for that element type are available. switch (VT.SimpleTy) { case MVT::v8f64: - return lowerV8F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV8F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16f32: - return lowerV16F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV16F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8i64: return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i32: @@ -12955,10 +13524,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, static bool canonicalizeShuffleMaskWithCommute(ArrayRef Mask) { int NumElements = Mask.size(); - int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0; + int NumV1Elements = 0, NumV2Elements = 0; for (int M : Mask) if (M < 0) - ++NumSentinelElements; + continue; else if (M < NumElements) ++NumV1Elements; else @@ -13068,8 +13637,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // We actually see shuffles that are entirely re-arrangements of a set of // zero inputs. This mostly happens while decomposing complex shuffles into // simple ones. Directly lower these as a buildvector of zeros. - SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); - if (Zeroable.all()) + APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2); + if (Zeroable.isAllOnesValue()) return getZeroVector(VT, Subtarget, DAG, DL); // Try to collapse shuffles into using a vector type with fewer elements but @@ -13245,10 +13814,14 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const "Unexpected vector type in ExtractBitFromMaskVector"); // variable index can't be handled in mask registers, - // extend vector to VR512 + // extend vector to VR512/128 if (!isa(Idx)) { - MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); - SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec); + unsigned NumElts = VecVT.getVectorNumElements(); + // Extending v8i1/v16i1 to 512-bit get better performance on KNL + // than extending to 128/256bit. + unsigned VecSize = (NumElts <= 4 ? 128 : 512); + MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts); + SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtVT.getVectorElementType(), Ext, Idx); return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); @@ -13266,9 +13839,9 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const } unsigned MaxSift = VecVT.getVectorNumElements() - 1; if (MaxSift - IdxVal) - Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, + Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec, DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8)); - Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec, + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, DAG.getConstant(MaxSift, dl, MVT::i8)); return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec, DAG.getIntPtrConstant(0, dl)); @@ -13286,24 +13859,36 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, return ExtractBitFromMaskVector(Op, DAG); if (!isa(Idx)) { - if (VecVT.is512BitVector() || - (VecVT.is256BitVector() && Subtarget.hasInt256() && - VecVT.getScalarSizeInBits() == 32)) { - - MVT MaskEltVT = - MVT::getIntegerVT(VecVT.getScalarSizeInBits()); - MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() / - MaskEltVT.getSizeInBits()); + // Its more profitable to go through memory (1 cycles throughput) + // than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput) + // IACA tool was used to get performace estimation + // (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer) + // + // exmample : extractelement <16 x i8> %a, i32 %i + // + // Block Throughput: 3.00 Cycles + // Throughput Bottleneck: Port5 + // + // | Num Of | Ports pressure in cycles | | + // | Uops | 0 - DV | 5 | 6 | 7 | | + // --------------------------------------------- + // | 1 | | 1.0 | | | CP | vmovd xmm1, edi + // | 1 | | 1.0 | | | CP | vpshufb xmm0, xmm0, xmm1 + // | 2 | 1.0 | 1.0 | | | CP | vpextrb eax, xmm0, 0x0 + // Total Num Of Uops: 4 + // + // + // Block Throughput: 1.00 Cycles + // Throughput Bottleneck: PORT2_AGU, PORT3_AGU, Port4 + // + // | | Ports pressure in cycles | | + // |Uops| 1 | 2 - D |3 - D | 4 | 5 | | + // --------------------------------------------------------- + // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], xmm0 + // |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18] + // |1 | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1] + // Total Num Of Uops: 4 - Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT); - auto PtrVT = getPointerTy(DAG.getDataLayout()); - SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT, - getZeroVector(MaskVT, Subtarget, DAG, dl), Idx, - DAG.getConstant(0, dl, PtrVT)); - SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Perm, - DAG.getConstant(0, dl, PtrVT)); - } return SDValue(); } @@ -13410,7 +13995,7 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { if(Vec.isUndef()) { if (IdxVal) - EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec, + EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, DAG.getConstant(IdxVal, dl, MVT::i8)); return EltInVec; } @@ -13420,21 +14005,21 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { if (IdxVal == 0 ) { // EltInVec already at correct index and other bits are 0. // Clean the first bit in source vector. - Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec, + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, DAG.getConstant(1 , dl, MVT::i8)); - Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, + Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec, DAG.getConstant(1, dl, MVT::i8)); return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } if (IdxVal == NumElems -1) { // Move the bit to the last position inside the vector. - EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec, + EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, DAG.getConstant(IdxVal, dl, MVT::i8)); // Clean the last bit in the source vector. - Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, + Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec, DAG.getConstant(1, dl, MVT::i8)); - Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec, + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, DAG.getConstant(1 , dl, MVT::i8)); return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); @@ -13466,17 +14051,21 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, auto *N2C = cast(N2); unsigned IdxVal = N2C->getZExtValue(); - // If we are clearing out a element, we do this more efficiently with a - // blend shuffle than a costly integer insertion. - // TODO: would other rematerializable values (e.g. allbits) benefit as well? + bool IsZeroElt = X86::isZeroNode(N1); + bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1); + + // If we are inserting a element, see if we can do this more efficiently with + // a blend shuffle with a rematerializable vector than a costly integer + // insertion. // TODO: pre-SSE41 targets will tend to use bit masking - this could still // be beneficial if we are inserting several zeros and can combine the masks. - if (X86::isZeroNode(N1) && Subtarget.hasSSE41() && NumElts <= 8) { - SmallVector ClearMask; + if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && NumElts <= 8) { + SmallVector BlendMask; for (unsigned i = 0; i != NumElts; ++i) - ClearMask.push_back(i == IdxVal ? i + NumElts : i); - SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, dl); - return DAG.getVectorShuffle(VT, dl, N0, ZeroVector, ClearMask); + BlendMask.push_back(i == IdxVal ? i + NumElts : i); + SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl) + : DAG.getConstant(-1, dl, VT); + return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask); } // If the vector is wider than 128 bits, extract the 128-bit subvector, insert @@ -13513,25 +14102,27 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, } assert(VT.is128BitVector() && "Only 128-bit vector types should be left!"); - if (Subtarget.hasSSE41()) { - if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) { - unsigned Opc; - if (VT == MVT::v8i16) { - Opc = X86ISD::PINSRW; - } else { - assert(VT == MVT::v16i8); - Opc = X86ISD::PINSRB; - } - - // Transform it so it match pinsr{b,w} which expects a GR32 as its second - // argument. - if (N1.getValueType() != MVT::i32) - N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); - if (N2.getValueType() != MVT::i32) - N2 = DAG.getIntPtrConstant(IdxVal, dl); - return DAG.getNode(Opc, dl, VT, N0, N1, N2); + // Transform it so it match pinsr{b,w} which expects a GR32 as its second + // argument. SSE41 required for pinsrb. + if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) { + unsigned Opc; + if (VT == MVT::v8i16) { + assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW"); + Opc = X86ISD::PINSRW; + } else { + assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector"); + assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB"); + Opc = X86ISD::PINSRB; } + if (N1.getValueType() != MVT::i32) + N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); + if (N2.getValueType() != MVT::i32) + N2 = DAG.getIntPtrConstant(IdxVal, dl); + return DAG.getNode(Opc, dl, VT, N0, N1, N2); + } + + if (Subtarget.hasSSE41()) { if (EltVT == MVT::f32) { // Bits [7:6] of the constant are the source select. This will always be // zero here. The DAG Combiner may combine an extract_elt index into @@ -13561,36 +14152,29 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); } - if (EltVT == MVT::i32 || EltVT == MVT::i64) { - // PINSR* works with constant index. + // PINSR* works with constant index. + if (EltVT == MVT::i32 || EltVT == MVT::i64) return Op; - } } - if (EltVT == MVT::i8) - return SDValue(); - - if (EltVT.getSizeInBits() == 16) { - // Transform it so it match pinsrw which expects a 16-bit value in a GR32 - // as its second argument. - if (N1.getValueType() != MVT::i32) - N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); - if (N2.getValueType() != MVT::i32) - N2 = DAG.getIntPtrConstant(IdxVal, dl); - return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); - } return SDValue(); } -static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { SDLoc dl(Op); MVT OpVT = Op.getSimpleValueType(); + // It's always cheaper to replace a xor+movd with xorps and simplifies further + // combines. + if (X86::isZeroNode(Op.getOperand(0))) + return getZeroVector(OpVT, Subtarget, DAG, dl); + // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. if (!OpVT.is128BitVector()) { // Insert into a 128-bit vector. - unsigned SizeFactor = OpVT.getSizeInBits()/128; + unsigned SizeFactor = OpVT.getSizeInBits() / 128; MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(), OpVT.getVectorNumElements() / SizeFactor); @@ -13599,9 +14183,13 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { // Insert the 128-bit vector. return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl); } + assert(OpVT.is128BitVector() && "Expected an SSE type!"); + + // Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen. + if (OpVT == MVT::v4i32) + return Op; SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - assert(OpVT.is128BitVector() && "Expected an SSE type!"); return DAG.getBitcast( OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt)); } @@ -13623,20 +14211,14 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, In.getSimpleValueType().is512BitVector()) && "Can only extract from 256-bit or 512-bit vectors"); - if (ResVT.is128BitVector()) - return extract128BitVector(In, IdxVal, DAG, dl); - if (ResVT.is256BitVector()) - return extract256BitVector(In, IdxVal, DAG, dl); - - llvm_unreachable("Unimplemented!"); -} + // If the input is a buildvector just emit a smaller one. + unsigned ElemsPerChunk = ResVT.getVectorNumElements(); + if (In.getOpcode() == ISD::BUILD_VECTOR) + return DAG.getNode(ISD::BUILD_VECTOR, dl, ResVT, + makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk)); -static bool areOnlyUsersOf(SDNode *N, ArrayRef ValidUsers) { - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) - if (llvm::all_of(ValidUsers, - [&I](SDValue V) { return V.getNode() != *I; })) - return false; - return true; + // Everything else is legal. + return Op; } // Lower a node with an INSERT_SUBVECTOR opcode. This may result in a @@ -13644,84 +14226,10 @@ static bool areOnlyUsersOf(SDNode *N, ArrayRef ValidUsers) { // the upper bits of a vector. static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - assert(Subtarget.hasAVX() && "INSERT_SUBVECTOR requires AVX"); - - SDLoc dl(Op); - SDValue Vec = Op.getOperand(0); - SDValue SubVec = Op.getOperand(1); - SDValue Idx = Op.getOperand(2); - - unsigned IdxVal = cast(Idx)->getZExtValue(); - MVT OpVT = Op.getSimpleValueType(); - MVT SubVecVT = SubVec.getSimpleValueType(); + assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1); - if (OpVT.getVectorElementType() == MVT::i1) - return insert1BitVector(Op, DAG, Subtarget); - - assert((OpVT.is256BitVector() || OpVT.is512BitVector()) && - "Can only insert into 256-bit or 512-bit vectors"); - - // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte - // load: - // (insert_subvector (insert_subvector undef, (load16 addr), 0), - // (load16 addr + 16), Elts/2) - // --> load32 addr - // or: - // (insert_subvector (insert_subvector undef, (load32 addr), 0), - // (load32 addr + 32), Elts/2) - // --> load64 addr - // or a 16-byte or 32-byte broadcast: - // (insert_subvector (insert_subvector undef, (load16 addr), 0), - // (load16 addr), Elts/2) - // --> X86SubVBroadcast(load16 addr) - // or: - // (insert_subvector (insert_subvector undef, (load32 addr), 0), - // (load32 addr), Elts/2) - // --> X86SubVBroadcast(load32 addr) - if ((IdxVal == OpVT.getVectorNumElements() / 2) && - Vec.getOpcode() == ISD::INSERT_SUBVECTOR && - OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) { - auto *Idx2 = dyn_cast(Vec.getOperand(2)); - if (Idx2 && Idx2->getZExtValue() == 0) { - SDValue SubVec2 = Vec.getOperand(1); - // If needed, look through bitcasts to get to the load. - if (auto *FirstLd = dyn_cast(peekThroughBitcasts(SubVec2))) { - bool Fast; - unsigned Alignment = FirstLd->getAlignment(); - unsigned AS = FirstLd->getAddressSpace(); - const X86TargetLowering *TLI = Subtarget.getTargetLowering(); - if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - OpVT, AS, Alignment, &Fast) && Fast) { - SDValue Ops[] = {SubVec2, SubVec}; - if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) - return Ld; - } - } - // If lower/upper loads are the same and the only users of the load, then - // lower to a VBROADCASTF128/VBROADCASTI128/etc. - if (auto *Ld = dyn_cast(peekThroughOneUseBitcasts(SubVec2))) { - if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && - areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) { - return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec); - } - } - // If this is subv_broadcast insert into both halves, use a larger - // subv_broadcast. - if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) { - return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, - SubVec.getOperand(0)); - } - } - } - - if (SubVecVT.is128BitVector()) - return insert128BitVector(Vec, SubVec, IdxVal, DAG, dl); - - if (SubVecVT.is256BitVector()) - return insert256BitVector(Vec, SubVec, IdxVal, DAG, dl); - - llvm_unreachable("Unimplemented!"); -} + return insert1BitVector(Op, DAG, Subtarget); +} // Returns the appropriate wrapper opcode for a global reference. unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const { @@ -14971,13 +15479,13 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, MVT InVT = In.getSimpleValueType(); SDLoc DL(Op); unsigned NumElts = VT.getVectorNumElements(); - if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI()) - return SDValue(); - if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) + if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1 && + (NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) return DAG.getNode(X86ISD::VZEXT, DL, VT, In); - assert(InVT.getVectorElementType() == MVT::i1); + if (InVT.getVectorElementType() != MVT::i1) + return SDValue(); // Extend VT if the target is 256 or 128bit vector and VLX is not supported. MVT ExtVT = VT; @@ -15165,7 +15673,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { // word to byte only under BWI if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8 return DAG.getNode(X86ISD::VTRUNC, DL, VT, - DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In)); + getExtendInVec(X86ISD::VSEXT, DL, MVT::v16i32, In, DAG)); return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); } @@ -15189,8 +15697,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.hasInt256()) { static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; In = DAG.getBitcast(MVT::v8i32, In); - In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32), - ShufMask); + In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, DAG.getIntPtrConstant(0, DL)); } @@ -15210,26 +15717,16 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.hasInt256()) { In = DAG.getBitcast(MVT::v32i8, In); - SmallVector pshufbMask; - for (unsigned i = 0; i < 2; ++i) { - pshufbMask.push_back(DAG.getConstant(0x0, DL, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x1, DL, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x4, DL, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x5, DL, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x8, DL, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x9, DL, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0xc, DL, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0xd, DL, MVT::i8)); - for (unsigned j = 0; j < 8; ++j) - pshufbMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); - } - SDValue BV = DAG.getBuildVector(MVT::v32i8, DL, pshufbMask); - In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV); + // The PSHUFB mask: + static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13, + -1, -1, -1, -1, -1, -1, -1, -1, + 16, 17, 20, 21, 24, 25, 28, 29, + -1, -1, -1, -1, -1, -1, -1, -1 }; + In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1); In = DAG.getBitcast(MVT::v4i64, In); - static const int ShufMask[] = {0, 2, -1, -1}; - In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64), - ShufMask); + static const int ShufMask2[] = {0, 2, -1, -1}; + In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, DAG.getIntPtrConstant(0, DL)); return DAG.getBitcast(VT, In); @@ -15248,9 +15745,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}; - SDValue Undef = DAG.getUNDEF(MVT::v16i8); - OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1); - OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1); + OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1); + OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1); OpLo = DAG.getBitcast(MVT::v4i32, OpLo); OpHi = DAG.getBitcast(MVT::v4i32, OpHi); @@ -15274,8 +15770,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { // Prepare truncation shuffle mask for (unsigned i = 0; i != NumElems; ++i) MaskVec[i] = i * 2; - SDValue V = DAG.getVectorShuffle(NVT, DL, DAG.getBitcast(NVT, In), - DAG.getUNDEF(NVT), MaskVec); + In = DAG.getBitcast(NVT, In); + SDValue V = DAG.getVectorShuffle(NVT, DL, In, In, MaskVec); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, DAG.getIntPtrConstant(0, DL)); } @@ -15567,7 +16063,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget, for (unsigned i = 0, e = VecIns.size(); i < e; ++i) VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]); - // If more than one full vectors are evaluated, OR them first before PTEST. + // If more than one full vector is evaluated, OR them first before PTEST. for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) { // Each iteration will OR 2 nodes and append the result until there is only // 1 node left, i.e. the final OR'd value of all vectors. @@ -15576,8 +16072,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget, VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS)); } - return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, - VecIns.back(), VecIns.back()); + return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back()); } /// \brief return true if \c Op has a use that doesn't just read flags. @@ -15694,6 +16189,12 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, } } + // Sometimes flags can be set either with an AND or with an SRL/SHL + // instruction. SRL/SHL variant should be preferred for masks longer than this + // number of bits. + const int ShiftToAndMaxMaskWidth = 32; + const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE); + // NOTICE: In the code below we use ArithOp to hold the arithmetic operation // which may be the result of a CAST. We use the variable 'Op', which is the // non-casted variable when we check for possible users. @@ -15742,7 +16243,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, // If we have a constant logical shift that's only used in a comparison // against zero turn it into an equivalent AND. This allows turning it into // a TEST instruction later. - if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() && + if (ZeroCheck && Op->hasOneUse() && isa(Op->getOperand(1)) && !hasNonFlagsUse(Op)) { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); @@ -15752,7 +16253,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, APInt Mask = ArithOp.getOpcode() == ISD::SRL ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt) : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt); - if (!Mask.isSignedIntN(32)) // Avoid large immediates. + if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth)) break; Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0), DAG.getConstant(Mask, dl, VT)); @@ -15761,18 +16262,59 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, case ISD::AND: // If the primary 'and' result isn't used, don't bother using X86ISD::AND, - // because a TEST instruction will be better. + // because a TEST instruction will be better. However, AND should be + // preferred if the instruction can be combined into ANDN. if (!hasNonFlagsUse(Op)) { SDValue Op0 = ArithOp->getOperand(0); SDValue Op1 = ArithOp->getOperand(1); EVT VT = ArithOp.getValueType(); bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1); bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64; + bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI(); + + // If we cannot select an ANDN instruction, check if we can replace + // AND+IMM64 with a shift before giving up. This is possible for masks + // like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag. + if (!isProperAndn) { + if (!ZeroCheck) + break; + + assert(!isa(Op0) && "AND node isn't canonicalized"); + auto *CN = dyn_cast(Op1); + if (!CN) + break; + + const APInt &Mask = CN->getAPIntValue(); + if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth)) + break; // Prefer TEST instruction. + + unsigned BitWidth = Mask.getBitWidth(); + unsigned LeadingOnes = Mask.countLeadingOnes(); + unsigned TrailingZeros = Mask.countTrailingZeros(); + + if (LeadingOnes + TrailingZeros == BitWidth) { + assert(TrailingZeros < VT.getSizeInBits() && + "Shift amount should be less than the type width"); + MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT); + SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy); + Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt); + break; + } + + unsigned LeadingZeros = Mask.countLeadingZeros(); + unsigned TrailingOnes = Mask.countTrailingOnes(); + + if (LeadingZeros + TrailingOnes == BitWidth) { + assert(LeadingZeros < VT.getSizeInBits() && + "Shift amount should be less than the type width"); + MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT); + SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy); + Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt); + break; + } - // But if we can combine this into an ANDN operation, then create an AND - // now and allow it to be pattern matched into an ANDN. - if (!Subtarget.hasBMI() || !isAndn || !isLegalAndnType) break; + } } LLVM_FALLTHROUGH; case ISD::SUB: @@ -15792,7 +16334,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; case ISD::OR: { - if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { + if (!NeedTruncation && ZeroCheck) { if (SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG)) return EFLAGS; } @@ -16856,20 +17398,28 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (Cond.getOpcode() == ISD::SETCC) { - if (SDValue NewCond = LowerSETCC(Cond, DAG)) + if (SDValue NewCond = LowerSETCC(Cond, DAG)) { Cond = NewCond; + // If the condition was updated, it's possible that the operands of the + // select were also updated (for example, EmitTest has a RAUW). Refresh + // the local references to the select operands in case they got stale. + Op1 = Op.getOperand(1); + Op2 = Op.getOperand(2); + } } // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y + // (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y + // (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y if (Cond.getOpcode() == X86ISD::SETCC && Cond.getOperand(1).getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); - - unsigned CondCode =cast(Cond.getOperand(0))->getZExtValue(); + unsigned CondCode = + cast(Cond.getOperand(0))->getZExtValue(); if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { @@ -16906,6 +17456,43 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (!isNullConstant(Op2)) Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); return Res; + } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E && + Cmp.getOperand(0).getOpcode() == ISD::AND && + isOneConstant(Cmp.getOperand(0).getOperand(1))) { + SDValue CmpOp0 = Cmp.getOperand(0); + SDValue Src1, Src2; + // true if Op2 is XOR or OR operator and one of its operands + // is equal to Op1 + // ( a , a op b) || ( b , a op b) + auto isOrXorPattern = [&]() { + if ((Op2.getOpcode() == ISD::XOR || Op2.getOpcode() == ISD::OR) && + (Op2.getOperand(0) == Op1 || Op2.getOperand(1) == Op1)) { + Src1 = + Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0); + Src2 = Op1; + return true; + } + return false; + }; + + if (isOrXorPattern()) { + SDValue Neg; + unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits(); + // we need mask of all zeros or ones with same size of the other + // operands. + if (CmpSz > VT.getSizeInBits()) + Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0); + else if (CmpSz < VT.getSizeInBits()) + Neg = DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)), + DAG.getConstant(1, DL, VT)); + else + Neg = CmpOp0; + SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + Neg); // -(and (x, 0x1)) + SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z + return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y + } } } @@ -17061,25 +17648,32 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, unsigned NumElts = VT.getVectorNumElements(); - if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI()) - return SDValue(); - - if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) { + if (VT.is512BitVector() && InVTElt != MVT::i1 && + (NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) { if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT) - return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0)); - return DAG.getNode(X86ISD::VSEXT, dl, VT, In); + return getExtendInVec(In.getOpcode(), dl, VT, In.getOperand(0), DAG); + return getExtendInVec(X86ISD::VSEXT, dl, VT, In, DAG); } - assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); - MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); - SDValue NegOne = DAG.getConstant( - APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT); - SDValue Zero = DAG.getConstant( - APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT); + if (InVTElt != MVT::i1) + return SDValue(); + + MVT ExtVT = VT; + if (!VT.is512BitVector() && !Subtarget.hasVLX()) + ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); + + SDValue V; + if (Subtarget.hasDQI()) { + V = getExtendInVec(X86ISD::VSEXT, dl, ExtVT, In, DAG); + assert(!VT.is512BitVector() && "Unexpected vector type"); + } else { + SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl); + SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl); + V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); + if (ExtVT == VT) + return V; + } - SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); - if (VT.is512BitVector()) - return V; return DAG.getNode(X86ISD::VTRUNC, dl, VT, V); } @@ -17122,11 +17716,15 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG || InVT == MVT::v64i8) && "Zero extend only for v64i8 input!"); - // SSE41 targets can use the pmovsx* instructions directly. - unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? - X86ISD::VSEXT : X86ISD::VZEXT; - if (Subtarget.hasSSE41()) + // SSE41 targets can use the pmovsx* instructions directly for 128-bit results, + // so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still + // need to be handled here for 256/512-bit results. + if (Subtarget.hasInt256()) { + assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension"); + unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? + X86ISD::VSEXT : X86ISD::VZEXT; return DAG.getNode(ExtOpc, dl, VT, In); + } // We should only get here for sign extend. assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG && @@ -17211,8 +17809,8 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements() / 2); - OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo); - OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi); + OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT); + OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } @@ -17527,7 +18125,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget, if (Ext == ISD::SEXTLOAD) { // If we have SSE4.1, we can directly emit a VSEXT node. if (Subtarget.hasSSE41()) { - SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec); + SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, RegVT, SlicedVec, DAG); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); return Sext; } @@ -18085,6 +18683,11 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, SelectionDAG &DAG) { MVT ElementType = VT.getVectorElementType(); + // Bitcast the source vector to the output type, this is mainly necessary for + // vXi8/vXi64 shifts. + if (VT != SrcOp.getSimpleValueType()) + SrcOp = DAG.getBitcast(VT, SrcOp); + // Fold this packed shift into its first operand if ShiftAmt is 0. if (ShiftAmt == 0) return SrcOp; @@ -18101,9 +18704,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, && "Unknown target vector shift-by-constant node"); // Fold this packed vector shift into a build vector if SrcOp is a - // vector of Constants or UNDEFs, and SrcOp valuetype is the same as VT. - if (VT == SrcOp.getSimpleValueType() && - ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) { + // vector of Constants or UNDEFs. + if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) { SmallVector Elts; unsigned NumElts = SrcOp->getNumOperands(); ConstantSDNode *ND; @@ -18159,6 +18761,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, /// constant. Takes immediate version of shift as input. static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, SDValue SrcOp, SDValue ShAmt, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT SVT = ShAmt.getSimpleValueType(); assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); @@ -18176,27 +18779,32 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; } - const X86Subtarget &Subtarget = - static_cast(DAG.getSubtarget()); - if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND && - ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) { - // Let the shuffle legalizer expand this shift amount node. - SDValue Op0 = ShAmt.getOperand(0); - Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0); - ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG); + // Need to build a vector containing shift amount. + // SSE/AVX packed shifts only use the lower 64-bit of the shift count. + // +=================+============+=======================================+ + // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as | + // +=================+============+=======================================+ + // | i64 | Yes, No | Use ShAmt as lowest elt | + // | i32 | Yes | zero-extend in-reg | + // | (i32 zext(i16)) | Yes | zero-extend in-reg | + // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) | + // +=================+============+=======================================+ + + if (SVT == MVT::i64) + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); + else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND && + ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) { + ShAmt = ShAmt.getOperand(0); + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt); + ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); + } else if (Subtarget.hasSSE41() && + ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); + ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); } else { - // Need to build a vector containing shift amount. - // SSE/AVX packed shifts only use the lower 64-bit of the shift count. - SmallVector ShOps; - ShOps.push_back(ShAmt); - if (SVT == MVT::i32) { - ShOps.push_back(DAG.getConstant(0, dl, SVT)); - ShOps.push_back(DAG.getUNDEF(SVT)); - } - ShOps.push_back(DAG.getUNDEF(SVT)); - - MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64; - ShAmt = DAG.getBuildVector(BVT, dl, ShOps); + SmallVector ShOps = {ShAmt, DAG.getConstant(0, dl, SVT), + DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)}; + ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); } // The return type has to be a 128-bit type with the same element @@ -18394,6 +19002,15 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + // Helper to detect if the operand is CUR_DIRECTION rounding mode. + auto isRoundModeCurDirection = [](SDValue Rnd) { + if (!isa(Rnd)) + return false; + + unsigned Round = cast(Rnd)->getZExtValue(); + return Round == X86::STATIC_ROUNDING::CUR_DIRECTION; + }; + SDLoc dl(Op); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); MVT VT = Op.getSimpleValueType(); @@ -18439,8 +19056,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(4); - unsigned Round = cast(Rnd)->getZExtValue(); - if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { + if (!isRoundModeCurDirection(Rnd)) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src, Rnd), @@ -18455,6 +19071,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue Src2 = Op.getOperand(2); SDValue passThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + SDValue Rnd = Op.getOperand(5); + if (!isRoundModeCurDirection(Rnd)) + return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, + dl, VT, Src1, Src2, Rnd), + Mask, passThru, Subtarget, DAG); + } return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), Mask, passThru, Subtarget, DAG); } @@ -18495,8 +19119,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(5); - unsigned Round = cast(Rnd)->getZExtValue(); - if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { + if (!isRoundModeCurDirection(Rnd)) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Rnd), @@ -18557,8 +19180,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_IMM8_MASK: - case INTR_TYPE_3OP_MASK: - case INSERT_SUBVEC: { + case INTR_TYPE_3OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); @@ -18567,13 +19189,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK) Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); - else if (IntrData->Type == INSERT_SUBVEC) { - // imm should be adapted to ISD::INSERT_SUBVECTOR behavior - assert(isa(Src3) && "Expected a ConstantSDNode here!"); - unsigned Imm = cast(Src3)->getZExtValue(); - Imm *= Src2.getSimpleValueType().getVectorNumElements(); - Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32); - } // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, @@ -18581,8 +19196,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(6); - unsigned Round = cast(Rnd)->getZExtValue(); - if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { + if (!isRoundModeCurDirection(Rnd)) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Src3, Rnd), @@ -18651,8 +19265,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(5); - if (cast(Rnd)->getZExtValue() != - X86::STATIC_ROUNDING::CUR_DIRECTION) + if (!isRoundModeCurDirection(Rnd)) return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src1, Src2, Src3, Rnd), @@ -18719,8 +19332,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(4); - unsigned Round = cast(Rnd)->getZExtValue(); - if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { + if (!isRoundModeCurDirection(Rnd)) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src, Rnd), @@ -18786,8 +19398,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget // (IntrData->Opc1 != 0), then we check the rounding mode operand. if (IntrData->Opc1 != 0) { SDValue Rnd = Op.getOperand(5); - if (cast(Rnd)->getZExtValue() != - X86::STATIC_ROUNDING::CUR_DIRECTION) + if (!isRoundModeCurDirection(Rnd)) Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), Op.getOperand(2), CC, Rnd); } @@ -18819,8 +19430,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue Cmp; if (IntrData->Opc1 != 0) { SDValue Rnd = Op.getOperand(5); - if (cast(Rnd)->getZExtValue() != - X86::STATIC_ROUNDING::CUR_DIRECTION) + if (!isRoundModeCurDirection(Rnd)) Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd); } //default rounding mode @@ -18879,8 +19489,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue Sae = Op.getOperand(4); SDValue FCmp; - if (cast(Sae)->getZExtValue() == - X86::STATIC_ROUNDING::CUR_DIRECTION) + if (isRoundModeCurDirection(Sae)) FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::i1, LHS, RHS, DAG.getConstant(CondVal, dl, MVT::i8)); else @@ -18891,7 +19500,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), - Op.getOperand(1), Op.getOperand(2), DAG); + Op.getOperand(1), Op.getOperand(2), Subtarget, + DAG); case COMPRESS_EXPAND_IN_REG: { SDValue Mask = Op.getOperand(3); SDValue DataToCompress = Op.getOperand(1); @@ -19469,6 +20079,33 @@ static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) { return Chain; } +/// Emit Truncating Store with signed or unsigned saturation. +static SDValue +EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val, + SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, + SelectionDAG &DAG) { + + SDVTList VTs = DAG.getVTList(MVT::Other); + SDValue Undef = DAG.getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + return SignedSat ? + DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO) : + DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO); +} + +/// Emit Masked Truncating Store with signed or unsigned saturation. +static SDValue +EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, + SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, SelectionDAG &DAG) { + + SDVTList VTs = DAG.getVTList(MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Val }; + return SignedSat ? + DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO) : + DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO); +} + static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); @@ -19627,18 +20264,39 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, MemIntrinsicSDNode *MemIntr = dyn_cast(Op); assert(MemIntr && "Expected MemIntrinsicSDNode!"); - EVT VT = MemIntr->getMemoryVT(); + EVT MemVT = MemIntr->getMemoryVT(); - if (isAllOnesConstant(Mask)) // return just a truncate store - return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, VT, - MemIntr->getMemOperand()); + uint16_t TruncationOp = IntrData->Opc0; + switch (TruncationOp) { + case X86ISD::VTRUNC: { + if (isAllOnesConstant(Mask)) // return just a truncate store + return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT, + MemIntr->getMemOperand()); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + + return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT, + MemIntr->getMemOperand(), true /* truncating */); + } + case X86ISD::VTRUNCUS: + case X86ISD::VTRUNCS: { + bool IsSigned = (TruncationOp == X86ISD::VTRUNCS); + if (isAllOnesConstant(Mask)) + return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT, + MemIntr->getMemOperand(), DAG); + + MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, VT, - MemIntr->getMemOperand(), true /* truncating */); + return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, + VMask, MemVT, MemIntr->getMemOperand(), DAG); + } + default: + llvm_unreachable("Unsupported truncstore intrinsic"); + } } + case EXPAND_FROM_MEM: { SDValue Mask = Op.getOperand(4); SDValue PassThru = Op.getOperand(3); @@ -20370,19 +21028,10 @@ static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2)); } -static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) { - if (Op.getValueType() == MVT::i1) - return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(), - Op.getOperand(0), Op.getOperand(1)); - assert(Op.getSimpleValueType().is256BitVector() && - Op.getSimpleValueType().isInteger() && - "Only handle AVX 256-bit vector integer operation"); - return Lower256IntArith(Op, DAG); -} - -static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) { - if (Op.getValueType() == MVT::i1) - return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(), +static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + if (VT.getScalarType() == MVT::i1) + return DAG.getNode(ISD::XOR, SDLoc(Op), VT, Op.getOperand(0), Op.getOperand(1)); assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && @@ -20402,7 +21051,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); - if (VT == MVT::i1) + if (VT.getScalarType() == MVT::i1) return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1)); // Decompose 256-bit ops into smaller 128-bit ops. @@ -20442,8 +21091,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, // Extract the lo parts and sign extend to i16 SDValue ALo, BLo; if (Subtarget.hasSSE41()) { - ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A); - BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B); + ALo = DAG.getSignExtendVectorInReg(A, dl, ExVT); + BLo = DAG.getSignExtendVectorInReg(B, dl, ExVT); } else { const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7}; @@ -20462,8 +21111,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, -1, -1, -1, -1, -1, -1, -1, -1}; AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); - AHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, AHi); - BHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, BHi); + AHi = DAG.getSignExtendVectorInReg(AHi, dl, ExVT); + BHi = DAG.getSignExtendVectorInReg(BHi, dl, ExVT); } else { const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15}; @@ -20527,13 +21176,12 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, // AloBlo = pmuludq(a, b); // AloBhi = pmuludq(a, Bhi); // AhiBlo = pmuludq(Ahi, b); - - // AloBhi = psllqi(AloBhi, 32); - // AhiBlo = psllqi(AhiBlo, 32); - // return AloBlo + AloBhi + AhiBlo; + // + // Hi = psllqi(AloBhi + AhiBlo, 32); + // return AloBlo + Hi; APInt LowerBitsMask = APInt::getLowBitsSet(64, 32); - bool ALoiIsZero = DAG.MaskedValueIsZero(A, LowerBitsMask); - bool BLoiIsZero = DAG.MaskedValueIsZero(B, LowerBitsMask); + bool ALoIsZero = DAG.MaskedValueIsZero(A, LowerBitsMask); + bool BLoIsZero = DAG.MaskedValueIsZero(B, LowerBitsMask); APInt UpperBitsMask = APInt::getHighBitsSet(64, 32); bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask); @@ -20543,29 +21191,31 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, SDValue Alo = DAG.getBitcast(MulVT, A); SDValue Blo = DAG.getBitcast(MulVT, B); - SDValue Res; + SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl); // Only multiply lo/hi halves that aren't known to be zero. - if (!ALoiIsZero && !BLoiIsZero) - Res = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Blo); + SDValue AloBlo = Zero; + if (!ALoIsZero && !BLoIsZero) + AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Blo); - if (!ALoiIsZero && !BHiIsZero) { + SDValue AloBhi = Zero; + if (!ALoIsZero && !BHiIsZero) { SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG); Bhi = DAG.getBitcast(MulVT, Bhi); - SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Bhi); - AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG); - Res = (Res.getNode() ? DAG.getNode(ISD::ADD, dl, VT, Res, AloBhi) : AloBhi); + AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Bhi); } - if (!AHiIsZero && !BLoiIsZero) { + SDValue AhiBlo = Zero; + if (!AHiIsZero && !BLoIsZero) { SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG); Ahi = DAG.getBitcast(MulVT, Ahi); - SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, Blo); - AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG); - Res = (Res.getNode() ? DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo) : AhiBlo); + AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, Blo); } - return (Res.getNode() ? Res : getZeroVector(VT, Subtarget, DAG, dl)); + SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo); + Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG); + + return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi); } static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, @@ -20623,8 +21273,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask)); } - SDValue ExA = DAG.getNode(ExSSE41, dl, MVT::v16i16, A); - SDValue ExB = DAG.getNode(ExSSE41, dl, MVT::v16i16, B); + SDValue ExA = getExtendInVec(ExSSE41, dl, MVT::v16i16, A, DAG); + SDValue ExB = getExtendInVec(ExSSE41, dl, MVT::v16i16, B, DAG); SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB); SDValue MulH = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul, DAG.getConstant(8, dl, MVT::v16i16)); @@ -20640,8 +21290,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, // Extract the lo parts and zero/sign extend to i16. SDValue ALo, BLo; if (Subtarget.hasSSE41()) { - ALo = DAG.getNode(ExSSE41, dl, ExVT, A); - BLo = DAG.getNode(ExSSE41, dl, ExVT, B); + ALo = getExtendInVec(ExSSE41, dl, ExVT, A, DAG); + BLo = getExtendInVec(ExSSE41, dl, ExVT, B, DAG); } else { const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7}; @@ -20660,8 +21310,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, -1, -1, -1, -1, -1, -1, -1, -1}; AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); - AHi = DAG.getNode(ExSSE41, dl, ExVT, AHi); - BHi = DAG.getNode(ExSSE41, dl, ExVT, BHi); + AHi = getExtendInVec(ExSSE41, dl, ExVT, AHi, DAG); + BHi = getExtendInVec(ExSSE41, dl, ExVT, BHi, DAG); } else { const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15}; @@ -20843,7 +21493,7 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget, bool LShift = VT.is128BitVector() || (VT.is256BitVector() && Subtarget.hasInt256()); - bool AShift = LShift && (Subtarget.hasVLX() || + bool AShift = LShift && (Subtarget.hasAVX512() || (VT != MVT::v2i64 && VT != MVT::v4i64)); return (Opcode == ISD::SRA) ? AShift : LShift; } @@ -20868,7 +21518,7 @@ static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget, if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI()) return false; - if (VT.is512BitVector() || Subtarget.hasVLX()) + if (Subtarget.hasAVX512()) return true; bool LShift = VT.is128BitVector() || VT.is256BitVector(); @@ -21104,7 +21754,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, else if (EltVT.bitsLT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); - return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, DAG); + return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG); } } @@ -21350,15 +22000,44 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); } + // It's worth extending once and using the vXi16/vXi32 shifts for smaller + // types, but without AVX512 the extra overheads to get from vXi8 to vXi32 + // make the existing SSE solution better. + if ((Subtarget.hasInt256() && VT == MVT::v8i16) || + (Subtarget.hasAVX512() && VT == MVT::v16i16) || + (Subtarget.hasAVX512() && VT == MVT::v16i8) || + (Subtarget.hasBWI() && VT == MVT::v32i8)) { + MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32); + MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements()); + unsigned ExtOpc = + Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + R = DAG.getNode(ExtOpc, dl, ExtVT, R); + Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); + } + if (VT == MVT::v16i8 || - (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) { + (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) || + (VT == MVT::v64i8 && Subtarget.hasBWI())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); unsigned ShiftOpcode = Op->getOpcode(); auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { - // On SSE41 targets we make use of the fact that VSELECT lowers - // to PBLENDVB which selects bytes based just on the sign bit. - if (Subtarget.hasSSE41()) { + if (VT.is512BitVector()) { + // On AVX512BW targets we make use of the fact that VSELECT lowers + // to a masked blend which selects bytes based just on the sign bit + // extracted to a mask. + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + V0 = DAG.getBitcast(VT, V0); + V1 = DAG.getBitcast(VT, V1); + Sel = DAG.getBitcast(VT, Sel); + Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel); + return DAG.getBitcast(SelVT, + DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1)); + } else if (Subtarget.hasSSE41()) { + // On SSE41 targets we make use of the fact that VSELECT lowers + // to PBLENDVB which selects bytes based just on the sign bit. V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); @@ -21458,19 +22137,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, } } - // It's worth extending once and using the v8i32 shifts for 16-bit types, but - // the extra overheads to get from v16i8 to v8i32 make the existing SSE - // solution better. - if (Subtarget.hasInt256() && VT == MVT::v8i16) { - MVT ExtVT = MVT::v8i32; - unsigned ExtOpc = - Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - R = DAG.getNode(ExtOpc, dl, ExtVT, R); - Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt); - return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); - } - if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { MVT ExtVT = MVT::v8i32; SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); @@ -21613,10 +22279,10 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // A subtract of one will be selected as a INC. Note that INC doesn't // set CF, so we can't do this for UADDO. if (isOneConstant(RHS)) { - BaseOp = X86ISD::INC; - Cond = X86::COND_O; - break; - } + BaseOp = X86ISD::INC; + Cond = X86::COND_O; + break; + } BaseOp = X86ISD::ADD; Cond = X86::COND_O; break; @@ -21628,10 +22294,10 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // A subtract of one will be selected as a DEC. Note that DEC doesn't // set CF, so we can't do this for USUBO. if (isOneConstant(RHS)) { - BaseOp = X86ISD::DEC; - Cond = X86::COND_O; - break; - } + BaseOp = X86ISD::DEC; + Cond = X86::COND_O; + break; + } BaseOp = X86ISD::SUB; Cond = X86::COND_O; break; @@ -22835,7 +23501,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); - case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -22911,8 +23577,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); - case ISD::ADD: return LowerADD(Op, DAG); - case ISD::SUB: return LowerSUB(Op, DAG); + case ISD::ADD: + case ISD::SUB: return LowerADD_SUB(Op, DAG); case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: @@ -23319,7 +23985,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; - case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::BLENDI: return "X86ISD::BLENDI"; @@ -23333,13 +23998,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ABS: return "X86ISD::ABS"; case X86ISD::CONFLICT: return "X86ISD::CONFLICT"; case X86ISD::FMAX: return "X86ISD::FMAX"; + case X86ISD::FMAXS: return "X86ISD::FMAXS"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; + case X86ISD::FMAXS_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; + case X86ISD::FMINS: return "X86ISD::FMINS"; case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND"; + case X86ISD::FMINS_RND: return "X86ISD::FMINS_RND"; case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; - case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS"; + case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::FRCPS: return "X86ISD::FRCPS"; case X86ISD::EXTRQI: return "X86ISD::EXTRQI"; @@ -23374,7 +24043,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS"; case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; - case X86ISD::VINSERT: return "X86ISD::VINSERT"; + case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES"; + case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS"; + case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES"; + case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND"; case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND"; @@ -23423,6 +24095,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TESTNM: return "X86ISD::TESTNM"; case X86ISD::KORTEST: return "X86ISD::KORTEST"; case X86ISD::KTEST: return "X86ISD::KTEST"; + case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL"; + case X86ISD::KSHIFTR: return "X86ISD::KSHIFTR"; case X86ISD::PACKSS: return "X86ISD::PACKSS"; case X86ISD::PACKUS: return "X86ISD::PACKUS"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; @@ -23523,9 +24197,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S"; case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; + case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND"; case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; + case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND"; case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; + case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND"; case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; + case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND"; case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND"; case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; @@ -23849,7 +24527,7 @@ static MachineBasicBlock *emitPCMPSTRM(MachineInstr &MI, MachineBasicBlock *BB, for (unsigned i = 1; i < NumArgs; ++i) { MachineOperand &Op = MI.getOperand(i); if (!(Op.isReg() && Op.isImplicit())) - MIB.addOperand(Op); + MIB.add(Op); } if (MI.hasOneMemOperand()) MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -23885,7 +24563,7 @@ static MachineBasicBlock *emitPCMPSTRI(MachineInstr &MI, MachineBasicBlock *BB, for (unsigned i = 1; i < NumArgs; ++i) { MachineOperand &Op = MI.getOperand(i); if (!(Op.isReg() && Op.isImplicit())) - MIB.addOperand(Op); + MIB.add(Op); } if (MI.hasOneMemOperand()) MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -23945,7 +24623,7 @@ static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB, unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); for (int i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI.getOperand(i)); + MIB.add(MI.getOperand(i)); unsigned ValOps = X86::AddrNumOperands; BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) @@ -23960,6 +24638,26 @@ static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB, return BB; } +static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB, + const X86Subtarget &Subtarget) { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + // Address into RAX/EAX + unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r; + unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); + for (int i = 0; i < X86::AddrNumOperands; ++i) + MIB.add(MI->getOperand(i)); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + + + MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -24083,12 +24781,12 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, // Load the offset value into a register OffsetReg = MRI.createVirtualRegister(OffsetRegClass); BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) - .addOperand(Base) - .addOperand(Scale) - .addOperand(Index) - .addDisp(Disp, UseFPOffset ? 4 : 0) - .addOperand(Segment) - .setMemRefs(MMOBegin, MMOEnd); + .add(Base) + .add(Scale) + .add(Index) + .addDisp(Disp, UseFPOffset ? 4 : 0) + .add(Segment) + .setMemRefs(MMOBegin, MMOEnd); // Check if there is enough room left to pull this argument. BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) @@ -24108,12 +24806,12 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, // Read the reg_save_area address. unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) - .addOperand(Base) - .addOperand(Scale) - .addOperand(Index) - .addDisp(Disp, 16) - .addOperand(Segment) - .setMemRefs(MMOBegin, MMOEnd); + .add(Base) + .add(Scale) + .add(Index) + .addDisp(Disp, 16) + .add(Segment) + .setMemRefs(MMOBegin, MMOEnd); // Zero-extend the offset unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); @@ -24135,13 +24833,13 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, // Store it back into the va_list. BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) - .addOperand(Base) - .addOperand(Scale) - .addOperand(Index) - .addDisp(Disp, UseFPOffset ? 4 : 0) - .addOperand(Segment) - .addReg(NextOffsetReg) - .setMemRefs(MMOBegin, MMOEnd); + .add(Base) + .add(Scale) + .add(Index) + .addDisp(Disp, UseFPOffset ? 4 : 0) + .add(Segment) + .addReg(NextOffsetReg) + .setMemRefs(MMOBegin, MMOEnd); // Jump to endMBB BuildMI(offsetMBB, DL, TII->get(X86::JMP_1)) @@ -24155,12 +24853,12 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, // Load the overflow_area address into a register. unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) - .addOperand(Base) - .addOperand(Scale) - .addOperand(Index) - .addDisp(Disp, 8) - .addOperand(Segment) - .setMemRefs(MMOBegin, MMOEnd); + .add(Base) + .add(Scale) + .add(Index) + .addDisp(Disp, 8) + .add(Segment) + .setMemRefs(MMOBegin, MMOEnd); // If we need to align it, do so. Otherwise, just copy the address // to OverflowDestReg. @@ -24191,13 +24889,13 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, // Store the new overflow address. BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr)) - .addOperand(Base) - .addOperand(Scale) - .addOperand(Index) - .addDisp(Disp, 8) - .addOperand(Segment) - .addReg(NextAddrReg) - .setMemRefs(MMOBegin, MMOEnd); + .add(Base) + .add(Scale) + .add(Index) + .addDisp(Disp, 8) + .add(Segment) + .addReg(NextAddrReg) + .setMemRefs(MMOBegin, MMOEnd); // If we branched, emit the PHI to the front of endMBB. if (offsetMBB) { @@ -24670,12 +25368,12 @@ X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI, // instruction using the same address operands. if (Operand.isReg()) Operand.setIsKill(false); - MIB.addOperand(Operand); + MIB.add(Operand); } MachineInstr *FOpMI = MIB; MIB = BuildMI(*BB, MI, DL, TII->get(MOp)); for (int i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI.getOperand(i)); + MIB.add(MI.getOperand(i)); MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -25055,7 +25753,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, if (i == X86::AddrDisp) MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset); else - MIB.addOperand(MI.getOperand(MemOpndSlot + i)); + MIB.add(MI.getOperand(MemOpndSlot + i)); } if (!UseImmLabel) MIB.addReg(LabelReg); @@ -25138,7 +25836,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, // Reload FP MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI.getOperand(i)); + MIB.add(MI.getOperand(i)); MIB.setMemRefs(MMOBegin, MMOEnd); // Reload IP MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp); @@ -25146,7 +25844,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, if (i == X86::AddrDisp) MIB.addDisp(MI.getOperand(i), LabelOffset); else - MIB.addOperand(MI.getOperand(i)); + MIB.add(MI.getOperand(i)); } MIB.setMemRefs(MMOBegin, MMOEnd); // Reload SP @@ -25155,7 +25853,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, if (i == X86::AddrDisp) MIB.addDisp(MI.getOperand(i), SPOffset); else - MIB.addOperand(MI.getOperand(i)); + MIB.add(MI.getOperand(i)); } MIB.setMemRefs(MMOBegin, MMOEnd); // Jump @@ -25580,6 +26278,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr); case X86::MONITORX: return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr); + + // Cache line zero + case X86::CLZERO: + return emitClzero(&MI, BB, Subtarget); + // PKU feature case X86::WRPKRU: return emitWRPKRU(MI, BB, Subtarget); @@ -25714,25 +26417,28 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; LLVM_FALLTHROUGH; case X86ISD::SETCC: - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + KnownZero.setBits(1, BitWidth); break; case X86ISD::MOVMSK: { unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements(); - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits); + KnownZero.setBits(NumLoBits, BitWidth); break; } case X86ISD::VZEXT: { SDValue N0 = Op.getOperand(0); unsigned NumElts = Op.getValueType().getVectorNumElements(); - unsigned InNumElts = N0.getValueType().getVectorNumElements(); - unsigned InBitWidth = N0.getValueType().getScalarSizeInBits(); + + EVT SrcVT = N0.getValueType(); + unsigned InNumElts = SrcVT.getVectorNumElements(); + unsigned InBitWidth = SrcVT.getScalarSizeInBits(); + assert(InNumElts >= NumElts && "Illegal VZEXT input"); KnownZero = KnownOne = APInt(InBitWidth, 0); - APInt DemandedElts = APInt::getLowBitsSet(InNumElts, NumElts); - DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1); + APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts); + DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedSrcElts, Depth + 1); KnownOne = KnownOne.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - InBitWidth); + KnownZero.setBits(InBitWidth, BitWidth); break; } } @@ -25775,25 +26481,17 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, // instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, + bool AllowFloatDomain, bool AllowIntDomain, + SDValue &V1, SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) { unsigned NumMaskElts = Mask.size(); unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); - bool FloatDomain = MaskVT.isFloatingPoint() || - (!Subtarget.hasAVX2() && MaskVT.is256BitVector()); - - // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). - if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) && - isUndefOrEqual(Mask[0], 0) && - isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { - Shuffle = X86ISD::VZEXT_MOVL; - SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; - return true; - } - // Match against a VZEXT instruction. - // TODO: Add 256/512-bit vector support. - if (!FloatDomain && MaskVT.is128BitVector() && Subtarget.hasSSE41()) { + // Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction. + // TODO: Add 512-bit vector support (split AVX512F and AVX512BW). + if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || + (MaskVT.is256BitVector() && Subtarget.hasInt256()))) { unsigned MaxScale = 64 / MaskEltSize; for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) { bool Match = true; @@ -25803,19 +26501,32 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1); } if (Match) { - SrcVT = MaskVT; + unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize); + SrcVT = MVT::getVectorVT(MaskVT.getScalarType(), SrcSize / MaskEltSize); + if (SrcVT != MaskVT) + V1 = extractSubVector(V1, 0, DAG, DL, SrcSize); DstVT = MVT::getIntegerVT(Scale * MaskEltSize); DstVT = MVT::getVectorVT(DstVT, NumDstElts); - Shuffle = X86ISD::VZEXT; + Shuffle = SrcVT != MaskVT ? unsigned(X86ISD::VZEXT) + : unsigned(ISD::ZERO_EXTEND_VECTOR_INREG); return true; } } } + // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). + if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) && + isUndefOrEqual(Mask[0], 0) && + isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { + Shuffle = X86ISD::VZEXT_MOVL; + SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; + return true; + } + // Check if we have SSE3 which will let us use MOVDDUP etc. The // instructions are no slower than UNPCKLPD but has the option to // fold the input operand into even an unaligned memory load. - if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && FloatDomain) { + if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) { if (isTargetShuffleEquivalent(Mask, {0, 0})) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v2f64; @@ -25833,7 +26544,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, } } - if (MaskVT.is256BitVector() && FloatDomain) { + if (MaskVT.is256BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles"); if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) { Shuffle = X86ISD::MOVDDUP; @@ -25852,7 +26563,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, } } - if (MaskVT.is512BitVector() && FloatDomain) { + if (MaskVT.is512BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX512() && "AVX512 required for 512-bit vector shuffles"); if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) { @@ -25891,24 +26602,26 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, // permute instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, + bool AllowFloatDomain, + bool AllowIntDomain, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); - bool FloatDomain = MaskVT.isFloatingPoint(); bool ContainsZeros = false; - SmallBitVector Zeroable(NumMaskElts, false); + APInt Zeroable(NumMaskElts, false); for (unsigned i = 0; i != NumMaskElts; ++i) { int M = Mask[i]; - Zeroable[i] = isUndefOrZero(M); + if (isUndefOrZero(M)) + Zeroable.setBit(i); ContainsZeros |= (M == SM_SentinelZero); } // Attempt to match against byte/bit shifts. // FIXME: Add 512-bit support. - if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || - (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { + if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || + (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, MaskVT.getScalarSizeInBits(), Mask, 0, Zeroable, Subtarget); @@ -25971,19 +26684,21 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). - if (FloatDomain && !Subtarget.hasAVX()) + if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX()) return false; // Pre-AVX2 we must use float shuffles on 256-bit vectors. - if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) - FloatDomain = true; + if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) { + AllowFloatDomain = true; + AllowIntDomain = false; + } // Check for lane crossing permutes. if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). if (Subtarget.hasAVX2() && MaskVT.is256BitVector() && Mask.size() == 4) { Shuffle = X86ISD::VPERMI; - ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); + ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); PermuteImm = getV4X86ShuffleImm(Mask); return true; } @@ -25991,7 +26706,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, SmallVector RepeatedMask; if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { Shuffle = X86ISD::VPERMI; - ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64); + ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); PermuteImm = getV4X86ShuffleImm(RepeatedMask); return true; } @@ -26000,7 +26715,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, } // VPERMILPD can permute with a non-repeating shuffle. - if (FloatDomain && MaskScalarSizeInBits == 64) { + if (AllowFloatDomain && MaskScalarSizeInBits == 64) { Shuffle = X86ISD::VPERMILPI; ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); PermuteImm = 0; @@ -26024,8 +26739,8 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, if (MaskScalarSizeInBits == 64) scaleShuffleMask(2, RepeatedMask, WordMask); - Shuffle = (FloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD); - ShuffleVT = (FloatDomain ? MVT::f32 : MVT::i32); + Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD); + ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32); ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); PermuteImm = getV4X86ShuffleImm(WordMask); return true; @@ -26035,37 +26750,38 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, // shuffle instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, - SDValue &V1, SDValue &V2, + bool AllowFloatDomain, bool AllowIntDomain, + SDValue &V1, SDValue &V2, SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, bool IsUnary) { - bool FloatDomain = MaskVT.isFloatingPoint(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { - if (isTargetShuffleEquivalent(Mask, {0, 0}) && FloatDomain) { + if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) { V2 = V1; Shuffle = X86ISD::MOVLHPS; ShuffleVT = MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(Mask, {1, 1}) && FloatDomain) { + if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) { V2 = V1; Shuffle = X86ISD::MOVHLPS; ShuffleVT = MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(Mask, {0, 3}) && FloatDomain) { - if (Subtarget.hasSSE2()) { - std::swap(V1, V2); - Shuffle = X86ISD::MOVSD; - ShuffleVT = MVT::v2f64; - return true; - } + if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() && + (AllowFloatDomain || !Subtarget.hasSSE41())) { + std::swap(V1, V2); + Shuffle = X86ISD::MOVSD; + ShuffleVT = MaskVT; + return true; } - if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) && FloatDomain) { + if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) && + (AllowFloatDomain || !Subtarget.hasSSE41())) { Shuffle = X86ISD::MOVSS; - ShuffleVT = MVT::v4f32; + ShuffleVT = MaskVT; return true; } } @@ -26076,57 +26792,12 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, (MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) || (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || (MaskVT.is512BitVector() && Subtarget.hasAVX512())) { - MVT LegalVT = MaskVT; - if (LegalVT.is256BitVector() && !Subtarget.hasAVX2()) - LegalVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64); - - SmallVector Unpckl, Unpckh; - if (IsUnary) { - createUnpackShuffleMask(MaskVT, Unpckl, true, true); - if (isTargetShuffleEquivalent(Mask, Unpckl)) { - V2 = V1; - Shuffle = X86ISD::UNPCKL; - ShuffleVT = LegalVT; - return true; - } - - createUnpackShuffleMask(MaskVT, Unpckh, false, true); - if (isTargetShuffleEquivalent(Mask, Unpckh)) { - V2 = V1; - Shuffle = X86ISD::UNPCKH; - ShuffleVT = LegalVT; - return true; - } - } else { - createUnpackShuffleMask(MaskVT, Unpckl, true, false); - if (isTargetShuffleEquivalent(Mask, Unpckl)) { - Shuffle = X86ISD::UNPCKL; - ShuffleVT = LegalVT; - return true; - } - - createUnpackShuffleMask(MaskVT, Unpckh, false, false); - if (isTargetShuffleEquivalent(Mask, Unpckh)) { - Shuffle = X86ISD::UNPCKH; - ShuffleVT = LegalVT; - return true; - } - - ShuffleVectorSDNode::commuteMask(Unpckl); - if (isTargetShuffleEquivalent(Mask, Unpckl)) { - std::swap(V1, V2); - Shuffle = X86ISD::UNPCKL; - ShuffleVT = LegalVT; - return true; - } - - ShuffleVectorSDNode::commuteMask(Unpckh); - if (isTargetShuffleEquivalent(Mask, Unpckh)) { - std::swap(V1, V2); - Shuffle = X86ISD::UNPCKH; - ShuffleVT = LegalVT; - return true; - } + if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL, + DAG, Subtarget)) { + ShuffleVT = MaskVT; + if (ShuffleVT.is256BitVector() && !Subtarget.hasAVX2()) + ShuffleVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64); + return true; } } @@ -26134,17 +26805,19 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, } static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, - SDValue &V1, SDValue &V2, - SDLoc &DL, SelectionDAG &DAG, + bool AllowFloatDomain, + bool AllowIntDomain, + SDValue &V1, SDValue &V2, SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); - bool FloatDomain = MaskVT.isFloatingPoint(); + unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); // Attempt to match against PALIGNR byte rotate. - if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) || - (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { + if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) || + (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask); if (0 < ByteRotation) { Shuffle = X86ISD::PALIGNR; @@ -26199,10 +26872,11 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, int M = Mask[i]; if (M == SM_SentinelUndef) continue; - else if (M == SM_SentinelZero) - MatchBlend = false; - else if ((M != i) && (M != (i + (int)NumMaskElts))) + if ((M == SM_SentinelZero) || + ((M != i) && (M != (i + (int)NumMaskElts)))) { MatchBlend = false; + break; + } } if (MatchBlend) { @@ -26219,13 +26893,14 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, } // Attempt to combine to INSERTPS. - if (Subtarget.hasSSE41() && MaskVT == MVT::v4f32) { - SmallBitVector Zeroable(4, false); + if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && + MaskVT.is128BitVector()) { + APInt Zeroable(4, 0); for (unsigned i = 0; i != NumMaskElts; ++i) if (Mask[i] < 0) - Zeroable[i] = true; + Zeroable.setBit(i); - if (Zeroable.any() && + if (Zeroable.getBoolValue() && matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { Shuffle = X86ISD::INSERTPS; ShuffleVT = MVT::v4f32; @@ -26234,16 +26909,64 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, } // Attempt to combine to SHUFPD. - if ((MaskVT == MVT::v2f64 && Subtarget.hasSSE2()) || - (MaskVT == MVT::v4f64 && Subtarget.hasAVX()) || - (MaskVT == MVT::v8f64 && Subtarget.hasAVX512())) { + if (AllowFloatDomain && EltSizeInBits == 64 && + ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || + (MaskVT.is256BitVector() && Subtarget.hasAVX()) || + (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) { Shuffle = X86ISD::SHUFP; - ShuffleVT = MaskVT; + ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64); return true; } } + // Attempt to combine to SHUFPS. + if (AllowFloatDomain && EltSizeInBits == 32 && + ((MaskVT.is128BitVector() && Subtarget.hasSSE1()) || + (MaskVT.is256BitVector() && Subtarget.hasAVX()) || + (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { + SmallVector RepeatedMask; + if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) { + // Match each half of the repeated mask, to determine if its just + // referencing one of the vectors, is zeroable or entirely undef. + auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) { + int M0 = RepeatedMask[Offset]; + int M1 = RepeatedMask[Offset + 1]; + + if (isUndefInRange(RepeatedMask, Offset, 2)) { + return DAG.getUNDEF(MaskVT); + } else if (isUndefOrZeroInRange(RepeatedMask, Offset, 2)) { + S0 = (SM_SentinelUndef == M0 ? -1 : 0); + S1 = (SM_SentinelUndef == M1 ? -1 : 1); + return getZeroVector(MaskVT, Subtarget, DAG, DL); + } else if (isUndefOrInRange(M0, 0, 4) && isUndefOrInRange(M1, 0, 4)) { + S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); + S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); + return V1; + } else if (isUndefOrInRange(M0, 4, 8) && isUndefOrInRange(M1, 4, 8)) { + S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); + S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); + return V2; + } + + return SDValue(); + }; + + int ShufMask[4] = {-1, -1, -1, -1}; + SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]); + SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]); + + if (Lo && Hi) { + V1 = Lo; + V2 = Hi; + Shuffle = X86ISD::SHUFP; + ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32); + PermuteImm = getV4X86ShuffleImm(ShufMask); + return true; + } + } + } + return false; } @@ -26350,10 +27073,19 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, : MVT::getIntegerVT(MaskEltSizeInBits); MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts); + // Only allow legal mask types. + if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) + return false; + // Attempt to match the mask against known shuffle patterns. MVT ShuffleSrcVT, ShuffleVT; unsigned Shuffle, PermuteImm; + // Which shuffle domains are permitted? + // Permit domain crossing at higher combine depths. + bool AllowFloatDomain = FloatDomain || (Depth > 3); + bool AllowIntDomain = !FloatDomain || (Depth > 3); + if (UnaryShuffle) { // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load // directly if we don't shuffle the lower element and we shuffle the upper @@ -26370,7 +27102,8 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } - if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleSrcVT, + if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, + V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! @@ -26385,7 +27118,8 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } - if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, + if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! @@ -26402,8 +27136,9 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } - if (matchBinaryVectorShuffle(MaskVT, Mask, V1, V2, Subtarget, Shuffle, - ShuffleVT, UnaryShuffle)) { + if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, + V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT, + UnaryShuffle)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) @@ -26419,8 +27154,10 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, V1, V2, DL, DAG, Subtarget, - Shuffle, ShuffleVT, PermuteImm)) { + if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + AllowIntDomain, V1, V2, DL, DAG, + Subtarget, Shuffle, ShuffleVT, + PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) @@ -26539,12 +27276,12 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) { APInt Zero = APInt::getNullValue(MaskEltSizeInBits); APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits); - SmallBitVector UndefElts(NumMaskElts, false); + APInt UndefElts(NumMaskElts, 0); SmallVector EltBits(NumMaskElts, Zero); for (unsigned i = 0; i != NumMaskElts; ++i) { int M = Mask[i]; if (M == SM_SentinelUndef) { - UndefElts[i] = true; + UndefElts.setBit(i); continue; } if (M == SM_SentinelZero) @@ -26728,8 +27465,8 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl &Ops, // Extract constant bits from each source op. bool OneUseConstantOp = false; - SmallVector UndefEltsOps(NumOps); - SmallVector, 4> RawBitsOps(NumOps); + SmallVector UndefEltsOps(NumOps); + SmallVector, 16> RawBitsOps(NumOps); for (unsigned i = 0; i != NumOps; ++i) { SDValue SrcOp = Ops[i]; OneUseConstantOp |= SrcOp.hasOneUse(); @@ -26745,18 +27482,18 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl &Ops, return false; // Shuffle the constant bits according to the mask. - SmallBitVector UndefElts(NumMaskElts, false); - SmallBitVector ZeroElts(NumMaskElts, false); - SmallBitVector ConstantElts(NumMaskElts, false); + APInt UndefElts(NumMaskElts, 0); + APInt ZeroElts(NumMaskElts, 0); + APInt ConstantElts(NumMaskElts, 0); SmallVector ConstantBitData(NumMaskElts, APInt::getNullValue(MaskSizeInBits)); for (unsigned i = 0; i != NumMaskElts; ++i) { int M = Mask[i]; if (M == SM_SentinelUndef) { - UndefElts[i] = true; + UndefElts.setBit(i); continue; } else if (M == SM_SentinelZero) { - ZeroElts[i] = true; + ZeroElts.setBit(i); continue; } assert(0 <= M && M < (int)(NumMaskElts * NumOps)); @@ -26766,21 +27503,21 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl &Ops, auto &SrcUndefElts = UndefEltsOps[SrcOpIdx]; if (SrcUndefElts[SrcMaskIdx]) { - UndefElts[i] = true; + UndefElts.setBit(i); continue; } auto &SrcEltBits = RawBitsOps[SrcOpIdx]; APInt &Bits = SrcEltBits[SrcMaskIdx]; if (!Bits) { - ZeroElts[i] = true; + ZeroElts.setBit(i); continue; } - ConstantElts[i] = true; + ConstantElts.setBit(i); ConstantBitData[i] = Bits; } - assert((UndefElts | ZeroElts | ConstantElts).count() == NumMaskElts); + assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue()); // Create the constant data. MVT MaskSVT; @@ -26830,6 +27567,7 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl &Ops, static bool combineX86ShufflesRecursively(ArrayRef SrcOps, int SrcOpIndex, SDValue Root, ArrayRef RootMask, + ArrayRef SrcNodes, int Depth, bool HasVariableMask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -26853,13 +27591,17 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, "Can only combine shuffles of the same vector register size."); // Extract target shuffle mask and resolve sentinels and inputs. - SDValue Input0, Input1; - SmallVector OpMask; - if (!resolveTargetShuffleInputs(Op, Input0, Input1, OpMask)) + SmallVector OpMask; + SmallVector OpInputs; + if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask)) return false; + assert(OpInputs.size() <= 2 && "Too many shuffle inputs"); + SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue()); + SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue()); + // Add the inputs to the Ops list, avoiding duplicates. - SmallVector Ops(SrcOps.begin(), SrcOps.end()); + SmallVector Ops(SrcOps.begin(), SrcOps.end()); int InputIdx0 = -1, InputIdx1 = -1; for (int i = 0, e = Ops.size(); i < e; ++i) { @@ -26892,8 +27634,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, (RootRatio == 1) != (OpRatio == 1)) && "Must not have a ratio for both incoming and op masks!"); - SmallVector Mask; - Mask.reserve(MaskWidth); + SmallVector Mask((unsigned)MaskWidth, SM_SentinelUndef); // Merge this shuffle operation's mask into our accumulated mask. Note that // this shuffle's mask will be the first applied to the input, followed by the @@ -26903,7 +27644,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, int RootIdx = i / RootRatio; if (RootMask[RootIdx] < 0) { // This is a zero or undef lane, we're done. - Mask.push_back(RootMask[RootIdx]); + Mask[i] = RootMask[RootIdx]; continue; } @@ -26913,7 +27654,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, // than the SrcOp we're currently inserting. if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) || (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) { - Mask.push_back(RootMaskedIdx); + Mask[i] = RootMaskedIdx; continue; } @@ -26923,7 +27664,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, if (OpMask[OpIdx] < 0) { // The incoming lanes are zero or undef, it doesn't matter which ones we // are using. - Mask.push_back(OpMask[OpIdx]); + Mask[i] = OpMask[OpIdx]; continue; } @@ -26939,7 +27680,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, OpMaskedIdx += InputIdx1 * MaskWidth; } - Mask.push_back(OpMaskedIdx); + Mask[i] = OpMaskedIdx; } // Handle the all undef/zero cases early. @@ -26957,28 +27698,25 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, } // Remove unused shuffle source ops. - SmallVector UsedOps; - for (int i = 0, e = Ops.size(); i < e; ++i) { - int lo = UsedOps.size() * MaskWidth; - int hi = lo + MaskWidth; - if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) { - UsedOps.push_back(Ops[i]); - continue; - } - for (int &M : Mask) - if (lo <= M) - M -= MaskWidth; - } - assert(!UsedOps.empty() && "Shuffle with no inputs detected"); - Ops = UsedOps; + resolveTargetShuffleInputsAndMask(Ops, Mask); + assert(!Ops.empty() && "Shuffle with no inputs detected"); HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode()); - // See if we can recurse into each shuffle source op (if it's a target shuffle). + // Update the list of shuffle nodes that have been combined so far. + SmallVector CombinedNodes(SrcNodes.begin(), + SrcNodes.end()); + CombinedNodes.push_back(Op.getNode()); + + // See if we can recurse into each shuffle source op (if it's a target + // shuffle). The source op should only be combined if it either has a + // single use (i.e. current Op) or all its users have already been combined. for (int i = 0, e = Ops.size(); i < e; ++i) - if (Ops[i].getNode()->hasOneUse() || Op->isOnlyUserOf(Ops[i].getNode())) - if (combineX86ShufflesRecursively(Ops, i, Root, Mask, Depth + 1, - HasVariableMask, DAG, DCI, Subtarget)) + if (Ops[i].getNode()->hasOneUse() || + SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) + if (combineX86ShufflesRecursively(Ops, i, Root, Mask, CombinedNodes, + Depth + 1, HasVariableMask, DAG, DCI, + Subtarget)) return true; // Attempt to constant fold all of the constant source ops. @@ -26995,7 +27733,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, // elements, and shrink them to the half-width mask. It does this in a loop // so it will reduce the size of the mask to the minimal width mask which // performs an equivalent shuffle. - SmallVector WidenedMask; + SmallVector WidenedMask; while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) { Mask = std::move(WidenedMask); } @@ -27061,8 +27799,7 @@ static SmallVector getPSHUFShuffleMask(SDValue N) { /// altering anything. static SDValue combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, - SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG) { assert(N.getOpcode() == X86ISD::PSHUFD && "Called with something other than an x86 128-bit half shuffle!"); SDLoc DL(N); @@ -27264,7 +28001,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, MVT VT = N.getSimpleValueType(); SmallVector Mask; - switch (N.getOpcode()) { + unsigned Opcode = N.getOpcode(); + switch (Opcode) { case X86ISD::PSHUFD: case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: @@ -27339,6 +28077,31 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return SDValue(); } + case X86ISD::MOVSD: + case X86ISD::MOVSS: { + bool isFloat = VT.isFloatingPoint(); + SDValue V0 = peekThroughBitcasts(N->getOperand(0)); + SDValue V1 = peekThroughBitcasts(N->getOperand(1)); + bool isFloat0 = V0.getSimpleValueType().isFloatingPoint(); + bool isFloat1 = V1.getSimpleValueType().isFloatingPoint(); + bool isZero0 = ISD::isBuildVectorAllZeros(V0.getNode()); + bool isZero1 = ISD::isBuildVectorAllZeros(V1.getNode()); + assert(!(isZero0 && isZero1) && "Zeroable shuffle detected."); + + // We often lower to MOVSD/MOVSS from integer as well as native float + // types; remove unnecessary domain-crossing bitcasts if we can to make it + // easier to combine shuffles later on. We've already accounted for the + // domain switching cost when we decided to lower with it. + if ((isFloat != isFloat0 || isZero0) && (isFloat != isFloat1 || isZero1)) { + MVT NewVT = isFloat ? (X86ISD::MOVSD == Opcode ? MVT::v2i64 : MVT::v4i32) + : (X86ISD::MOVSD == Opcode ? MVT::v2f64 : MVT::v4f32); + V0 = DAG.getBitcast(NewVT, V0); + V1 = DAG.getBitcast(NewVT, V1); + return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, NewVT, V0, V1)); + } + + return SDValue(); + } case X86ISD::INSERTPS: { assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); SDValue Op0 = N.getOperand(0); @@ -27499,7 +28262,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, break; case X86ISD::PSHUFD: - if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG, DCI)) + if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG)) return NewN; break; @@ -27508,29 +28271,32 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return SDValue(); } -/// \brief Try to combine a shuffle into a target-specific add-sub node. +/// Returns true iff the shuffle node \p N can be replaced with ADDSUB +/// operation. If true is returned then the operands of ADDSUB operation +/// are written to the parameters \p Opnd0 and \p Opnd1. /// -/// We combine this directly on the abstract vector shuffle nodes so it is -/// easier to generically match. We also insert dummy vector shuffle nodes for -/// the operands which explicitly discard the lanes which are unused by this -/// operation to try to flow through the rest of the combiner the fact that -/// they're unused. -static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { - SDLoc DL(N); +/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes +/// so it is easier to generically match. We also insert dummy vector shuffle +/// nodes for the operands which explicitly discard the lanes which are unused +/// by this operation to try to flow through the rest of the combiner +/// the fact that they're unused. +static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget, + SDValue &Opnd0, SDValue &Opnd1) { + EVT VT = N->getValueType(0); if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && - (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) - return SDValue(); + (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) && + (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64))) + return false; // We only handle target-independent shuffles. // FIXME: It would be easy and harmless to use the target shuffle mask // extraction tool to support more. if (N->getOpcode() != ISD::VECTOR_SHUFFLE) - return SDValue(); + return false; ArrayRef OrigMask = cast(N)->getMask(); - SmallVector Mask(OrigMask.begin(), OrigMask.end()); + SmallVector Mask(OrigMask.begin(), OrigMask.end()); SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); @@ -27541,27 +28307,57 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget, ShuffleVectorSDNode::commuteMask(Mask); std::swap(V1, V2); } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD) - return SDValue(); + return false; // If there are other uses of these operations we can't fold them. if (!V1->hasOneUse() || !V2->hasOneUse()) - return SDValue(); + return false; // Ensure that both operations have the same operands. Note that we can // commute the FADD operands. SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1); if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) && (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS)) - return SDValue(); + return false; // We're looking for blends between FADD and FSUB nodes. We insist on these // nodes being lined up in a specific expected pattern. if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) || isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) || - isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}))) + isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) || + isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23, + 8, 25, 10, 27, 12, 29, 14, 31}))) + return false; + + Opnd0 = LHS; + Opnd1 = RHS; + return true; +} + +/// \brief Try to combine a shuffle into a target-specific add-sub or +/// mul-add-sub node. +static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDValue Opnd0, Opnd1; + if (!isAddSub(N, Subtarget, Opnd0, Opnd1)) return SDValue(); - return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Try to generate X86ISD::FMADDSUB node here. + SDValue Opnd2; + if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2)) + return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2); + + // Do not generate X86ISD::ADDSUB node for 512-bit types even though + // the ADDSUB idiom has been successfully recognized. There are no known + // X86 targets with 512-bit ADDSUB instructions! + if (VT.is512BitVector()) + return SDValue(); + + return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); } // We are looking for a shuffle where both sources are concatenated with undef @@ -27614,16 +28410,11 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc dl(N); EVT VT = N->getValueType(0); - - // Don't create instructions with illegal types after legalize types has run. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType())) - return SDValue(); - // If we have legalized the vector types, look for blends of FADD and FSUB // nodes that we can fuse into an ADDSUB node. if (TLI.isTypeLegal(VT)) - if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG)) + if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) return AddSub; // During Type Legalization, when promoting illegal vector types, @@ -27690,11 +28481,18 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are // consecutive, non-overlapping, and in the right order. SmallVector Elts; - for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) - Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) { + Elts.push_back(Elt); + continue; + } + Elts.clear(); + break; + } - if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true)) - return LD; + if (Elts.size() == VT.getVectorNumElements()) + if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true)) + return LD; // For AVX2, we sometimes want to combine // (vector_shuffle (concat_vectors t1, undef) @@ -27717,7 +28515,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // a particular chain. SmallVector NonceMask; // Just a placeholder. NonceMask.push_back(0); - if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, + if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, DAG, DCI, Subtarget)) return SDValue(); // This routine will use CombineTo to replace N. @@ -27744,18 +28542,13 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EVT OriginalVT = InVec.getValueType(); - if (InVec.getOpcode() == ISD::BITCAST) { - // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); - EVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || - BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements()) - return SDValue(); - InVec = InVec.getOperand(0); - } + // Peek through bitcasts, don't duplicate a load with other uses. + InVec = peekThroughOneUseBitcasts(InVec); EVT CurrentVT = InVec.getValueType(); + if (!CurrentVT.isVector() || + CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements()) + return SDValue(); if (!isTargetShuffle(InVec.getOpcode())) return SDValue(); @@ -27834,14 +28627,14 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); // Detect bitcasts between i32 to x86mmx low word. Since MMX types are // special and don't usually play with other vector types, it's better to // handle them early to be sure we emit efficient code by avoiding // store-load conversions. if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR && - N0.getValueType() == MVT::v2i32 && - isNullConstant(N0.getOperand(1))) { + SrcVT == MVT::v2i32 && isNullConstant(N0.getOperand(1))) { SDValue N00 = N0->getOperand(0); if (N00.getValueType() == MVT::i32) return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00); @@ -28017,6 +28810,83 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0, return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1); } +// Attempt to replace an all_of/any_of style horizontal reduction with a MOVMSK. +static SDValue combineHorizontalPredicateResult(SDNode *Extract, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + // Bail without SSE2 or with AVX512VL (which uses predicate registers). + if (!Subtarget.hasSSE2() || Subtarget.hasVLX()) + return SDValue(); + + EVT ExtractVT = Extract->getValueType(0); + unsigned BitWidth = ExtractVT.getSizeInBits(); + if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 && + ExtractVT != MVT::i8) + return SDValue(); + + // Check for OR(any_of) and AND(all_of) horizontal reduction patterns. + for (ISD::NodeType Op : {ISD::OR, ISD::AND}) { + SDValue Match = matchBinOpReduction(Extract, Op); + if (!Match) + continue; + + // EXTRACT_VECTOR_ELT can require implicit extension of the vector element + // which we can't support here for now. + if (Match.getScalarValueSizeInBits() != BitWidth) + continue; + + // We require AVX2 for PMOVMSKB for v16i16/v32i8; + unsigned MatchSizeInBits = Match.getValueSizeInBits(); + if (!(MatchSizeInBits == 128 || + (MatchSizeInBits == 256 && + ((Subtarget.hasAVX() && BitWidth >= 32) || Subtarget.hasAVX2())))) + return SDValue(); + + // Don't bother performing this for 2-element vectors. + if (Match.getValueType().getVectorNumElements() <= 2) + return SDValue(); + + // Check that we are extracting a reduction of all sign bits. + if (DAG.ComputeNumSignBits(Match) != BitWidth) + return SDValue(); + + // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB. + MVT MaskVT; + if (64 == BitWidth || 32 == BitWidth) + MaskVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth), + MatchSizeInBits / BitWidth); + else + MaskVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8); + + APInt CompareBits; + ISD::CondCode CondCode; + if (Op == ISD::OR) { + // any_of -> MOVMSK != 0 + CompareBits = APInt::getNullValue(32); + CondCode = ISD::CondCode::SETNE; + } else { + // all_of -> MOVMSK == ((1 << NumElts) - 1) + CompareBits = APInt::getLowBitsSet(32, MaskVT.getVectorNumElements()); + CondCode = ISD::CondCode::SETEQ; + } + + // Perform the select as i32/i64 and then truncate to avoid partial register + // stalls. + unsigned ResWidth = std::max(BitWidth, 32u); + EVT ResVT = EVT::getIntegerVT(*DAG.getContext(), ResWidth); + SDLoc DL(Extract); + SDValue Zero = DAG.getConstant(0, DL, ResVT); + SDValue Ones = DAG.getAllOnesConstant(DL, ResVT); + SDValue Res = DAG.getBitcast(MaskVT, Match); + Res = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Res); + Res = DAG.getSelectCC(DL, Res, DAG.getConstant(CompareBits, DL, MVT::i32), + Ones, Zero, CondCode); + return DAG.getSExtOrTrunc(Res, DL, ExtractVT); + } + + return SDValue(); +} + static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // PSADBW is only supported on SSE2 and up. @@ -28084,6 +28954,87 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, Extract->getOperand(1)); } +// Attempt to peek through a target shuffle and extract the scalar from the +// source. +static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Src = N->getOperand(0); + SDValue Idx = N->getOperand(1); + + EVT SrcVT = Src.getValueType(); + EVT SrcSVT = SrcVT.getVectorElementType(); + EVT VT = N->getValueType(0); + + // Don't attempt this for boolean mask vectors or unknown extraction indices. + if (SrcSVT == MVT::i1 || !isa(Idx)) + return SDValue(); + + // Resolve the target shuffle inputs and mask. + SmallVector Mask; + SmallVector Ops; + if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask)) + return SDValue(); + + // At the moment we can only narrow a shuffle mask to handle extractions + // of smaller scalars. + // TODO - investigate support for wider shuffle masks with known upper + // undef/zero elements for implicit zero-extension. + unsigned NumMaskElts = Mask.size(); + if ((SrcVT.getVectorNumElements() % NumMaskElts) != 0) + return SDValue(); + + int Scale = SrcVT.getVectorNumElements() / NumMaskElts; + if (Scale != 1) { + SmallVector ScaledMask; + scaleShuffleMask(Scale, Mask, ScaledMask); + Mask = ScaledMask; + } + + int SrcIdx = Mask[N->getConstantOperandVal(1)]; + SDLoc dl(N); + + // If the shuffle source element is undef/zero then we can just accept it. + if (SrcIdx == SM_SentinelUndef) + return DAG.getUNDEF(VT); + + if (SrcIdx == SM_SentinelZero) + return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT) + : DAG.getConstant(0, dl, VT); + + SDValue SrcOp = Ops[SrcIdx / Mask.size()]; + SrcOp = DAG.getBitcast(SrcVT, SrcOp); + SrcIdx = SrcIdx % Mask.size(); + + // We can only extract other elements from 128-bit vectors and in certain + // circumstances, depending on SSE-level. + // TODO: Investigate using extract_subvector for larger vectors. + // TODO: Investigate float/double extraction if it will be just stored. + if ((SrcVT == MVT::v4i32 || SrcVT == MVT::v2i64) && + ((SrcIdx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) { + assert(SrcSVT == VT && "Unexpected extraction type"); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcSVT, SrcOp, + DAG.getIntPtrConstant(SrcIdx, dl)); + } + + if ((SrcVT == MVT::v8i16 && Subtarget.hasSSE2()) || + (SrcVT == MVT::v16i8 && Subtarget.hasSSE41())) { + assert(VT.getSizeInBits() >= SrcSVT.getSizeInBits() && + "Unexpected extraction type"); + unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB); + SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp, + DAG.getIntPtrConstant(SrcIdx, dl)); + SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, ExtOp, + DAG.getValueType(SrcSVT)); + return DAG.getZExtOrTrunc(Assert, dl, VT); + } + + return SDValue(); +} + /// Detect vector gather/scatter index generation and convert it from being a /// bunch of shuffles and extracts into a somewhat faster sequence. /// For i686, the best sequence is apparently storing the value and loading @@ -28094,14 +29045,29 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI)) return NewOp; + if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget)) + return NewOp; + SDValue InputVector = N->getOperand(0); + SDValue EltIdx = N->getOperand(1); + + EVT SrcVT = InputVector.getValueType(); + EVT VT = N->getValueType(0); SDLoc dl(InputVector); + + // Detect mmx extraction of all bits as a i64. It works better as a bitcast. + if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && + VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) { + SDValue MMXSrc = InputVector.getOperand(0); + + // The bitcast source is a direct mmx result. + if (MMXSrc.getValueType() == MVT::x86mmx) + return DAG.getBitcast(VT, InputVector); + } + // Detect mmx to i32 conversion through a v2i32 elt extract. if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && - N->getValueType(0) == MVT::i32 && - InputVector.getValueType() == MVT::v2i32 && - isa(N->getOperand(1)) && - N->getConstantOperandVal(1) == 0) { + VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) { SDValue MMXSrc = InputVector.getOperand(0); // The bitcast source is a direct mmx result. @@ -28109,15 +29075,11 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc); } - EVT VT = N->getValueType(0); - - if (VT == MVT::i1 && isa(N->getOperand(1)) && - InputVector.getOpcode() == ISD::BITCAST && + if (VT == MVT::i1 && InputVector.getOpcode() == ISD::BITCAST && + isa(EltIdx) && isa(InputVector.getOperand(0))) { - uint64_t ExtractedElt = - cast(N->getOperand(1))->getZExtValue(); - uint64_t InputValue = - cast(InputVector.getOperand(0))->getZExtValue(); + uint64_t ExtractedElt = N->getConstantOperandVal(1); + uint64_t InputValue = InputVector.getConstantOperandVal(0); uint64_t Res = (InputValue >> ExtractedElt) & 1; return DAG.getConstant(Res, dl, MVT::i1); } @@ -28128,9 +29090,13 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget)) return SAD; + // Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK. + if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget)) + return Cmp; + // Only operate on vectors of 4 elements, where the alternative shuffling // gets to be more expensive. - if (InputVector.getValueType() != MVT::v4i32) + if (SrcVT != MVT::v4i32) return SDValue(); // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a @@ -28158,9 +29124,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, return SDValue(); // Record which element was extracted. - ExtractedElements |= - 1 << cast(Extract->getOperand(1))->getZExtValue(); - + ExtractedElements |= 1 << Extract->getConstantOperandVal(1); Uses.push_back(Extract); } @@ -28193,11 +29157,11 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, DAG.getNode(ISD::SRA, dl, MVT::i64, TopHalf, ShAmt)); } else { // Store the value to a temporary stack slot. - SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType()); + SDValue StackPtr = DAG.CreateStackTemporary(SrcVT); SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, MachinePointerInfo()); - EVT ElementType = InputVector.getValueType().getVectorElementType(); + EVT ElementType = SrcVT.getVectorElementType(); unsigned EltSize = ElementType.getSizeInBits() / 8; // Replace each use (extract) with a load of the appropriate element. @@ -28220,8 +29184,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, UE = Uses.end(); UI != UE; ++UI) { SDNode *Extract = *UI; - SDValue Idx = Extract->getOperand(1); - uint64_t IdxVal = cast(Idx)->getZExtValue(); + uint64_t IdxVal = Extract->getConstantOperandVal(1); DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]); } @@ -28229,10 +29192,22 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// If a vector select has an operand that is -1 or 0, simplify the select to a -/// bitwise logic operation. -static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +// TODO - merge with combineExtractVectorElt once it can handle the implicit +// zero-extension of X86ISD::PINSRW/X86ISD::PINSRB in: +// XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and +// combineBasicSADPattern. +static SDValue combineExtractVectorElt_SSE(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + return combineExtractWithShuffle(N, DAG, DCI, Subtarget); +} + +/// If a vector select has an operand that is -1 or 0, try to simplify the +/// select to a bitwise logic operation. +static SDValue +combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -28244,17 +29219,19 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, if (N->getOpcode() != ISD::VSELECT) return SDValue(); + assert(CondVT.isVector() && "Vector select expects a vector selector!"); + bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); - // Check if the first operand is all zeros.This situation only - // applies to avx512. - if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse()) { - //Invert the cond to not(cond) : xor(op,allones)=not(op) - SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, - DAG.getConstant(1, DL, Cond.getValueType())); - //Vselect cond, op1, op2 = Vselect not(cond), op2, op1 - return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS); + // Check if the first operand is all zeros and Cond type is vXi1. + // This situation only applies to avx512. + if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() && + CondVT.getVectorElementType() == MVT::i1) { + // Invert the cond to not(cond) : xor(op,allones)=not(op) + SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + DAG.getAllOnesConstant(DL, CondVT)); + // Vselect cond, op1, op2 = Vselect not(cond), op2, op1 + return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS); } - assert(CondVT.isVector() && "Vector select expects a vector selector!"); // To use the condition operand as a bitwise mask, it must have elements that // are the same size as the select elements. Ie, the condition operand must @@ -28291,18 +29268,28 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, } } - if (!TValIsAllOnes && !FValIsAllZeros) + // vselect Cond, 111..., 000... -> Cond + if (TValIsAllOnes && FValIsAllZeros) + return DAG.getBitcast(VT, Cond); + + if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT)) return SDValue(); - SDValue Ret; - if (TValIsAllOnes && FValIsAllZeros) - Ret = Cond; - else if (TValIsAllOnes) - Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS)); - else if (FValIsAllZeros) - Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS)); + // vselect Cond, 111..., X -> or Cond, X + if (TValIsAllOnes) { + SDValue CastRHS = DAG.getBitcast(CondVT, RHS); + SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS); + return DAG.getBitcast(VT, Or); + } + + // vselect Cond, X, 000... -> and Cond, X + if (FValIsAllZeros) { + SDValue CastLHS = DAG.getBitcast(CondVT, LHS); + SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS); + return DAG.getBitcast(VT, And); + } - return DAG.getBitcast(VT, Ret); + return SDValue(); } static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { @@ -28469,6 +29456,66 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG, return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } + case ISD::INSERT_SUBVECTOR: { + unsigned EltSize = EltVT.getSizeInBits(); + if (EltSize != 32 && EltSize != 64) + return false; + MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); + // Only change element size, not type. + if (EltVT.isInteger() != OpEltVT.isInteger()) + return false; + uint64_t Imm = cast(Op.getOperand(2))->getZExtValue(); + Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize; + SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0)); + DCI.AddToWorklist(Op0.getNode()); + // Op1 needs to be bitcasted to a smaller vector with the same element type. + SDValue Op1 = Op.getOperand(1); + MVT Op1VT = MVT::getVectorVT(EltVT, + Op1.getSimpleValueType().getSizeInBits() / EltSize); + Op1 = DAG.getBitcast(Op1VT, Op1); + DCI.AddToWorklist(Op1.getNode()); + DCI.CombineTo(OrigOp.getNode(), + DAG.getNode(Opcode, DL, VT, Op0, Op1, + DAG.getIntPtrConstant(Imm, DL))); + return true; + } + case ISD::EXTRACT_SUBVECTOR: { + unsigned EltSize = EltVT.getSizeInBits(); + if (EltSize != 32 && EltSize != 64) + return false; + MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); + // Only change element size, not type. + if (EltVT.isInteger() != OpEltVT.isInteger()) + return false; + uint64_t Imm = cast(Op.getOperand(1))->getZExtValue(); + Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize; + // Op0 needs to be bitcasted to a larger vector with the same element type. + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = MVT::getVectorVT(EltVT, + Op0.getSimpleValueType().getSizeInBits() / EltSize); + Op0 = DAG.getBitcast(Op0VT, Op0); + DCI.AddToWorklist(Op0.getNode()); + DCI.CombineTo(OrigOp.getNode(), + DAG.getNode(Opcode, DL, VT, Op0, + DAG.getIntPtrConstant(Imm, DL))); + return true; + } + case X86ISD::SUBV_BROADCAST: { + unsigned EltSize = EltVT.getSizeInBits(); + if (EltSize != 32 && EltSize != 64) + return false; + // Only change element size, not type. + if (VT.isInteger() != Op.getSimpleValueType().isInteger()) + return false; + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = MVT::getVectorVT(EltVT, + Op0.getSimpleValueType().getSizeInBits() / EltSize); + Op0 = DAG.getBitcast(Op0VT, Op.getOperand(0)); + DCI.AddToWorklist(Op0.getNode()); + DCI.CombineTo(OrigOp.getNode(), + DAG.getNode(Opcode, DL, VT, Op0)); + return true; + } } return false; @@ -28747,13 +29794,13 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, } } - if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget)) + if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) return V; // If this is a *dynamic* select (non-constant condition) and we can match // this node with one of the variable blend instructions, restructure the - // condition so that the blends can use the high bit of each element and use - // SimplifyDemandedBits to simplify the condition operand. + // condition so that blends can use the high (sign) bit of each element and + // use SimplifyDemandedBits to simplify the condition operand. if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && !DCI.isBeforeLegalize() && !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { @@ -28788,49 +29835,45 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); - APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); - + APInt DemandedMask(APInt::getSignBit(BitWidth)); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) || TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO)) { - // If we changed the computation somewhere in the DAG, this change - // will affect all users of Cond. - // Make sure it is fine and update all the nodes so that we do not - // use the generic VSELECT anymore. Otherwise, we may perform - // wrong optimizations as we messed up with the actual expectation + // If we changed the computation somewhere in the DAG, this change will + // affect all users of Cond. Make sure it is fine and update all the nodes + // so that we do not use the generic VSELECT anymore. Otherwise, we may + // perform wrong optimizations as we messed with the actual expectation // for the vector boolean values. if (Cond != TLO.Old) { - // Check all uses of that condition operand to check whether it will be - // consumed by non-BLEND instructions, which may depend on all bits are - // set properly. - for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end(); - I != E; ++I) - if (I->getOpcode() != ISD::VSELECT) - // TODO: Add other opcodes eventually lowered into BLEND. + // Check all uses of the condition operand to check whether it will be + // consumed by non-BLEND instructions. Those may require that all bits + // are set properly. + for (SDNode *U : Cond->uses()) { + // TODO: Add other opcodes eventually lowered into BLEND. + if (U->getOpcode() != ISD::VSELECT) return SDValue(); + } - // Update all the users of the condition, before committing the change, - // so that the VSELECT optimizations that expect the correct vector - // boolean value will not be triggered. - for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end(); - I != E; ++I) - DAG.ReplaceAllUsesOfValueWith( - SDValue(*I, 0), - DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(*I), I->getValueType(0), - Cond, I->getOperand(1), I->getOperand(2))); + // Update all users of the condition before committing the change, so + // that the VSELECT optimizations that expect the correct vector boolean + // value will not be triggered. + for (SDNode *U : Cond->uses()) { + SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U), + U->getValueType(0), Cond, U->getOperand(1), + U->getOperand(2)); + DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB); + } DCI.CommitTargetLoweringOpt(TLO); return SDValue(); } - // At this point, only Cond is changed. Change the condition - // just for N to keep the opportunity to optimize all other - // users their own way. - DAG.ReplaceAllUsesOfValueWith( - SDValue(N, 0), - DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(N), N->getValueType(0), - TLO.New, N->getOperand(1), N->getOperand(2))); + // Only Cond (rather than other nodes in the computation chain) was + // changed. Change the condition just for N to keep the opportunity to + // optimize all other users their own way. + SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, DL, VT, TLO.New, LHS, RHS); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), SB); return SDValue(); } } @@ -28838,7 +29881,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Look for vselects with LHS/RHS being bitcasted from an operation that // can be executed on another type. Push the bitcast to the inputs of // the operation. This exposes opportunities for using masking instructions. - if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalizeOps() && + if (N->getOpcode() == ISD::VSELECT && DCI.isAfterLegalizeVectorOps() && CondVT.getVectorElementType() == MVT::i1) { if (combineBitcastForMaskedOp(LHS, DAG, DCI)) return SDValue(N, 0); @@ -28862,6 +29905,12 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) return SDValue(); + // Can't replace the cmp if it has more uses than the one we're looking at. + // FIXME: We would like to be able to handle this, but would need to make sure + // all uses were updated. + if (!Cmp.hasOneUse()) + return SDValue(); + // This only applies to variations of the common case: // (icmp slt x, 0) -> (icmp sle (add x, 1), 0) // (icmp sge x, 0) -> (icmp sgt (add x, 1), 0) @@ -29584,22 +30633,37 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, } if (!NewMul) { - assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) - && "Both cases that could cause potential overflows should have " - "already been handled."); - if (isPowerOf2_64(MulAmt - 1)) - // (mul x, 2^N + 1) => (add (shl x, N), x) - NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), - DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), - DAG.getConstant(Log2_64(MulAmt - 1), DL, - MVT::i8))); - - else if (isPowerOf2_64(MulAmt + 1)) - // (mul x, 2^N - 1) => (sub (shl x, N), x) - NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT, - N->getOperand(0), - DAG.getConstant(Log2_64(MulAmt + 1), - DL, MVT::i8)), N->getOperand(0)); + assert(MulAmt != 0 && + MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) && + "Both cases that could cause potential overflows should have " + "already been handled."); + int64_t SignMulAmt = C->getSExtValue(); + if ((SignMulAmt != INT64_MIN) && (SignMulAmt != INT64_MAX) && + (SignMulAmt != -INT64_MAX)) { + int NumSign = SignMulAmt > 0 ? 1 : -1; + bool IsPowerOf2_64PlusOne = isPowerOf2_64(NumSign * SignMulAmt - 1); + bool IsPowerOf2_64MinusOne = isPowerOf2_64(NumSign * SignMulAmt + 1); + if (IsPowerOf2_64PlusOne) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + NewMul = DAG.getNode( + ISD::ADD, DL, VT, N->getOperand(0), + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(NumSign * SignMulAmt - 1), DL, + MVT::i8))); + } else if (IsPowerOf2_64MinusOne) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + NewMul = DAG.getNode( + ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(NumSign * SignMulAmt + 1), DL, + MVT::i8)), + N->getOperand(0)); + } + // To negate, subtract the number from zero + if ((IsPowerOf2_64PlusOne || IsPowerOf2_64MinusOne) && NumSign == -1) + NewMul = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul); + } } if (NewMul) @@ -29775,31 +30839,84 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG, static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { - assert((X86ISD::VSHLI == N->getOpcode() || X86ISD::VSRLI == N->getOpcode()) && - "Unexpected opcode"); + unsigned Opcode = N->getOpcode(); + assert((X86ISD::VSHLI == Opcode || X86ISD::VSRAI == Opcode || + X86ISD::VSRLI == Opcode) && + "Unexpected shift opcode"); + bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode; EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); unsigned NumBitsPerElt = VT.getScalarSizeInBits(); - - // This fails for mask register (vXi1) shifts. - if ((NumBitsPerElt % 8) != 0) - return SDValue(); + assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 && + "Unexpected value type"); // Out of range logical bit shifts are guaranteed to be zero. + // Out of range arithmetic bit shifts splat the sign bit. APInt ShiftVal = cast(N->getOperand(1))->getAPIntValue(); - if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) + if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) { + if (LogicalShift) + return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N)); + else + ShiftVal = NumBitsPerElt - 1; + } + + // Shift N0 by zero -> N0. + if (!ShiftVal) + return N0; + + // Shift zero -> zero. + if (ISD::isBuildVectorAllZeros(N0.getNode())) return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N)); // We can decode 'whole byte' logical bit shifts as shuffles. - if ((ShiftVal.getZExtValue() % 8) == 0) { + if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) { SDValue Op(N, 0); SmallVector NonceMask; // Just a placeholder. NonceMask.push_back(0); - if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, + if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, DAG, DCI, Subtarget)) return SDValue(); // This routine will use CombineTo to replace N. } + // Constant Folding. + APInt UndefElts; + SmallVector EltBits; + if (N->isOnlyUserOf(N0.getNode()) && + getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) { + assert(EltBits.size() == VT.getVectorNumElements() && + "Unexpected shift value type"); + unsigned ShiftImm = ShiftVal.getZExtValue(); + for (APInt &Elt : EltBits) { + if (X86ISD::VSHLI == Opcode) + Elt = Elt.shl(ShiftImm); + else if (X86ISD::VSRAI == Opcode) + Elt = Elt.ashr(ShiftImm); + else + Elt = Elt.lshr(ShiftImm); + } + return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); + } + + return SDValue(); +} + +static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + assert( + ((N->getOpcode() == X86ISD::PINSRB && N->getValueType(0) == MVT::v16i8) || + (N->getOpcode() == X86ISD::PINSRW && + N->getValueType(0) == MVT::v8i16)) && + "Unexpected vector insertion"); + + // Attempt to combine PINSRB/PINSRW patterns to a shuffle. + SDValue Op(N, 0); + SmallVector NonceMask; // Just a placeholder. + NonceMask.push_back(0); + combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, + /*Depth*/ 1, /*HasVarMask*/ false, DAG, + DCI, Subtarget); return SDValue(); } @@ -29918,33 +31035,15 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { if (VT != MVT::v2i64 && VT != MVT::v4i64 && VT != MVT::v8i64) return SDValue(); - // Canonicalize XOR to the left. - if (N1.getOpcode() == ISD::XOR) - std::swap(N0, N1); + if (N0.getOpcode() == ISD::XOR && + ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) + return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1); - if (N0.getOpcode() != ISD::XOR) - return SDValue(); + if (N1.getOpcode() == ISD::XOR && + ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) + return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0); - SDValue N00 = N0->getOperand(0); - SDValue N01 = N0->getOperand(1); - - N01 = peekThroughBitcasts(N01); - - // Either match a direct AllOnes for 128, 256, and 512-bit vectors, or an - // insert_subvector building a 256-bit AllOnes vector. - if (!ISD::isBuildVectorAllOnes(N01.getNode())) { - if (!VT.is256BitVector() || N01->getOpcode() != ISD::INSERT_SUBVECTOR) - return SDValue(); - - SDValue V1 = N01->getOperand(0); - SDValue V2 = N01->getOperand(1); - if (V1.getOpcode() != ISD::INSERT_SUBVECTOR || - !V1.getOperand(0).isUndef() || - !ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) || - !ISD::isBuildVectorAllOnes(V2.getNode())) - return SDValue(); - } - return DAG.getNode(X86ISD::ANDNP, DL, VT, N00, N1); + return SDValue(); } // On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized @@ -30128,7 +31227,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); SmallVector NonceMask; // Just a placeholder. NonceMask.push_back(0); - if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, + if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, DAG, DCI, Subtarget)) return SDValue(); // This routine will use CombineTo to replace N. @@ -30337,7 +31436,7 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() && X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E && N->getOperand(1).getOpcode() == X86ISD::CMP && - N->getOperand(1).getConstantOperandVal(1) == 0 && + isNullConstant(N->getOperand(1).getOperand(1)) && N->getOperand(1).getValueType().bitsGE(MVT::i32); }; @@ -30640,6 +31739,74 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } +/// Check if truncation with saturation form type \p SrcVT to \p DstVT +/// is valid for the given \p Subtarget. +static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, + const X86Subtarget &Subtarget) { + if (!Subtarget.hasAVX512()) + return false; + + // FIXME: Scalar type may be supported if we move it to vector register. + if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512) + return false; + + EVT SrcElVT = SrcVT.getScalarType(); + EVT DstElVT = DstVT.getScalarType(); + if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64) + return false; + if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32) + return false; + if (SrcVT.is512BitVector() || Subtarget.hasVLX()) + return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); + return false; +} + +/// Detect a pattern of truncation with saturation: +/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). +/// Return the source value to be truncated or SDValue() if the pattern was not +/// matched. +static SDValue detectUSatPattern(SDValue In, EVT VT) { + if (In.getOpcode() != ISD::UMIN) + return SDValue(); + + //Saturation with truncation. We truncate from InVT to VT. + assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() && + "Unexpected types for truncate operation"); + + APInt C; + if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) { + // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according + // the element size of the destination type. + return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) : + SDValue(); + } + return SDValue(); +} + +/// Detect a pattern of truncation with saturation: +/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). +/// The types should allow to use VPMOVUS* instruction on AVX512. +/// Return the source value to be truncated or SDValue() if the pattern was not +/// matched. +static SDValue detectAVX512USatPattern(SDValue In, EVT VT, + const X86Subtarget &Subtarget) { + if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) + return SDValue(); + return detectUSatPattern(In, VT); +} + +static SDValue +combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT)) + return SDValue(); + if (auto USatVal = detectUSatPattern(In, VT)) + if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) + return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); + return SDValue(); +} + /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. @@ -31032,7 +32199,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, Mld->getBasePtr(), NewMask, WideSrc0, Mld->getMemoryVT(), Mld->getMemOperand(), ISD::NON_EXTLOAD); - SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd); + SDValue NewVec = getExtendInVec(X86ISD::VSEXT, dl, VT, WideLd, DAG); return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true); } @@ -31206,6 +32373,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); + if (SDValue Val = + detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget)) + return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), + dl, Val, St->getBasePtr(), + St->getMemoryVT(), St->getMemOperand(), DAG); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); @@ -31552,6 +32725,100 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify +/// the codegen. +/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) ) +static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget, + SDLoc &DL) { + assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); + SDValue Src = N->getOperand(0); + unsigned Opcode = Src.getOpcode(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + EVT VT = N->getValueType(0); + EVT SrcVT = Src.getValueType(); + + auto IsRepeatedOpOrFreeTruncation = [VT](SDValue Op0, SDValue Op1) { + unsigned TruncSizeInBits = VT.getScalarSizeInBits(); + + // Repeated operand, so we are only trading one output truncation for + // one input truncation. + if (Op0 == Op1) + return true; + + // See if either operand has been extended from a smaller/equal size to + // the truncation size, allowing a truncation to combine with the extend. + unsigned Opcode0 = Op0.getOpcode(); + if ((Opcode0 == ISD::ANY_EXTEND || Opcode0 == ISD::SIGN_EXTEND || + Opcode0 == ISD::ZERO_EXTEND) && + Op0.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits) + return true; + + unsigned Opcode1 = Op1.getOpcode(); + if ((Opcode1 == ISD::ANY_EXTEND || Opcode1 == ISD::SIGN_EXTEND || + Opcode1 == ISD::ZERO_EXTEND) && + Op1.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits) + return true; + + // See if either operand is a single use constant which can be constant + // folded. + SDValue BC0 = peekThroughOneUseBitcasts(Op0); + SDValue BC1 = peekThroughOneUseBitcasts(Op1); + return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) || + ISD::isBuildVectorOfConstantSDNodes(BC1.getNode()); + }; + + auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { + SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); + SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1); + }; + + // Don't combine if the operation has other uses. + if (!N->isOnlyUserOf(Src.getNode())) + return SDValue(); + + // Only support vector truncation for now. + // TODO: i64 scalar math would benefit as well. + if (!VT.isVector()) + return SDValue(); + + // In most cases its only worth pre-truncating if we're only facing the cost + // of one truncation. + // i.e. if one of the inputs will constant fold or the input is repeated. + switch (Opcode) { + case ISD::AND: + case ISD::XOR: + case ISD::OR: { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + if (TLI.isOperationLegalOrPromote(Opcode, VT) && + IsRepeatedOpOrFreeTruncation(Op0, Op1)) + return TruncateArithmetic(Op0, Op1); + break; + } + + case ISD::MUL: + // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its + // better to truncate if we have the chance. + if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) && + !TLI.isOperationLegal(Opcode, SrcVT)) + return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); + LLVM_FALLTHROUGH; + case ISD::ADD: { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + if (TLI.isOperationLegal(Opcode, VT) && + IsRepeatedOpOrFreeTruncation(Op0, Op1)) + return TruncateArithmetic(Op0, Op1); + break; + } + } + + return SDValue(); +} + /// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS. static SDValue combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG, @@ -31609,7 +32876,8 @@ combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG, /// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS. static SDValue -combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG, +combineVectorTruncationWithPACKSS(SDNode *N, const X86Subtarget &Subtarget, + SelectionDAG &DAG, SmallVector &Regs) { assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32); EVT OutVT = N->getValueType(0); @@ -31618,8 +32886,10 @@ combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG, // Shift left by 16 bits, then arithmetic-shift right by 16 bits. SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32); for (auto &Reg : Regs) { - Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG); - Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG); + Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, + Subtarget, DAG); + Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, + Subtarget, DAG); } for (unsigned i = 0, e = Regs.size() / 2; i < e; i++) @@ -31688,7 +32958,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasSSE41() || OutSVT == MVT::i8) return combineVectorTruncationWithPACKUS(N, DAG, SubVec); else if (InSVT == MVT::i32) - return combineVectorTruncationWithPACKSS(N, DAG, SubVec); + return combineVectorTruncationWithPACKSS(N, Subtarget, DAG, SubVec); else return SDValue(); } @@ -31738,10 +33008,18 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, SDValue Src = N->getOperand(0); SDLoc DL(N); + // Attempt to pre-truncate inputs to arithmetic ops instead. + if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) + return V; + // Try to detect AVG pattern first. if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; + // Try to combine truncation with unsigned saturation. + if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget)) + return Val; + // The bitcast source is a direct mmx result. // Detect bitcasts between i32 to x86mmx if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { @@ -31863,23 +33141,24 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - EVT VT = N->getValueType(0); - if (VT.is512BitVector() && !Subtarget.hasDQI()) { - // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extension. - // These logic operations may be executed in the integer domain. + MVT VT = N->getSimpleValueType(0); + // If we have integer vector types available, use the integer opcodes. + if (VT.isVector() && Subtarget.hasSSE2()) { SDLoc dl(N); - SDValue Op0 = DAG.getBitcast(MVT::v8i64, N->getOperand(0)); - SDValue Op1 = DAG.getBitcast(MVT::v8i64, N->getOperand(1)); - unsigned IntOpcode = 0; + MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); + + SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0)); + SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1)); + unsigned IntOpcode; switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected FP logic op"); - case X86ISD::FOR: IntOpcode = ISD::OR; break; - case X86ISD::FXOR: IntOpcode = ISD::XOR; break; - case X86ISD::FAND: IntOpcode = ISD::AND; break; - case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; + default: llvm_unreachable("Unexpected FP logic op"); + case X86ISD::FOR: IntOpcode = ISD::OR; break; + case X86ISD::FXOR: IntOpcode = ISD::XOR; break; + case X86ISD::FAND: IntOpcode = ISD::AND; break; + case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; } - SDValue IntOp = DAG.getNode(IntOpcode, dl, MVT::v8i64, Op0, Op1); + SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); return DAG.getBitcast(VT, IntOp); } return SDValue(); @@ -32087,6 +33366,34 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax); } +/// Do target-specific dag combines on X86ISD::ANDNP nodes. +static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + // ANDNP(0, x) -> x + if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) + return N->getOperand(1); + + // ANDNP(x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode())) + return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N)); + + EVT VT = N->getValueType(0); + + // Attempt to recursively combine a bitmask ANDNP with shuffles. + if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { + SDValue Op(N, 0); + SmallVector NonceMask; // Just a placeholder. + NonceMask.push_back(0); + if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, + /*Depth*/ 1, /*HasVarMask*/ false, DAG, + DCI, Subtarget)) + return SDValue(); // This routine will use CombineTo to replace N. + } + + return SDValue(); +} + static SDValue combineBT(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { // BT ignores high bits in the bit index operand. @@ -32348,13 +33655,22 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (!DCI.isBeforeLegalizeOps()) { if (InVT == MVT::i1) { SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue AllOnes = - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero); } return SDValue(); } + if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR && + isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) { + // Invert and sign-extend a boolean is the same as zero-extend and subtract + // 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently + // lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1. + // sext (xor Bool, -1) --> sub (zext Bool), 1 + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT)); + } + if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) return V; @@ -32576,27 +33892,23 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// Helper function of performSETCCCombine. It is to materialize "setb reg" -// as "sbb reg,reg", since it can be extended without zext and produces -// an all-ones bit which is more useful than 0/1 in some cases. -static SDValue MaterializeSETB(const SDLoc &DL, SDValue EFLAGS, - SelectionDAG &DAG, MVT VT) { +/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit +/// which is more useful than 0/1 in some cases. +static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) { + SDLoc DL(N); + // "Condition code B" is also known as "the carry flag" (CF). + SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8); + SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS); + MVT VT = N->getSimpleValueType(0); if (VT == MVT::i8) - return DAG.getNode(ISD::AND, DL, VT, - DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(X86::COND_B, DL, MVT::i8), - EFLAGS), - DAG.getConstant(1, DL, VT)); - assert (VT == MVT::i1 && "Unexpected type for SECCC node"); - return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, - DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(X86::COND_B, DL, MVT::i8), - EFLAGS)); + return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT)); + + assert(VT == MVT::i1 && "Unexpected type for SETCC node"); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB); } // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc DL(N); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); @@ -32616,15 +33928,12 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, EFLAGS.getNode()->getVTList(), EFLAGS.getOperand(1), EFLAGS.getOperand(0)); SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); - return MaterializeSETB(DL, NewEFLAGS, DAG, N->getSimpleValueType(0)); + return materializeSBB(N, NewEFLAGS, DAG); } } - // Materialize "setb reg" as "sbb reg,reg", since it can be extended without - // a zext and produces an all-ones bit which is more useful than 0/1 in some - // cases. if (CC == X86::COND_B) - return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0)); + return materializeSBB(N, EFLAGS, DAG); // Try to simplify the EFLAGS and condition code operands. if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) @@ -32635,7 +33944,6 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, /// Optimize branch condition evaluation. static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc DL(N); SDValue EFLAGS = N->getOperand(3); @@ -32821,45 +34129,61 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// fold (add Y, (sete X, 0)) -> adc 0, Y -/// (add Y, (setne X, 0)) -> sbb -1, Y -/// (sub (sete X, 0), Y) -> sbb 0, Y -/// (sub (setne X, 0), Y) -> adc -1, Y -static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) { - SDLoc DL(N); +/// If this is an add or subtract where one operand is produced by a cmp+setcc, +/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB} +/// with CMP+{ADC, SBB}. +static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { + bool IsSub = N->getOpcode() == ISD::SUB; + SDValue X = N->getOperand(0); + SDValue Y = N->getOperand(1); - // Look through ZExts. - SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0); - if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse()) - return SDValue(); + // If this is an add, canonicalize a zext operand to the RHS. + // TODO: Incomplete? What if both sides are zexts? + if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND && + Y.getOpcode() != ISD::ZERO_EXTEND) + std::swap(X, Y); + + // Look through a one-use zext. + if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) + Y = Y.getOperand(0); - SDValue SetCC = Ext.getOperand(0); - if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse()) + // If this is an add, canonicalize a setcc operand to the RHS. + // TODO: Incomplete? What if both sides are setcc? + if (!IsSub && X.getOpcode() == X86ISD::SETCC && + Y.getOpcode() != X86ISD::SETCC) + std::swap(X, Y); + + if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse()) return SDValue(); - X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0); + X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); if (CC != X86::COND_E && CC != X86::COND_NE) return SDValue(); - SDValue Cmp = SetCC.getOperand(1); + SDValue Cmp = Y.getOperand(1); if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() || !X86::isZeroNode(Cmp.getOperand(1)) || !Cmp.getOperand(0).getValueType().isInteger()) return SDValue(); - SDValue CmpOp0 = Cmp.getOperand(0); - SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, - DAG.getConstant(1, DL, CmpOp0.getValueType())); + SDLoc DL(N); + EVT VT = N->getValueType(0); + + // (cmp Z, 1) sets the carry flag if Z is 0. + SDValue Z = Cmp.getOperand(0); + SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, + DAG.getConstant(1, DL, Z.getValueType())); - SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1); + // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1) + // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1) if (CC == X86::COND_NE) - return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB, - DL, OtherVal.getValueType(), OtherVal, - DAG.getConstant(-1ULL, DL, OtherVal.getValueType()), - NewCmp); - return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC, - DL, OtherVal.getValueType(), OtherVal, - DAG.getConstant(0, DL, OtherVal.getValueType()), NewCmp); + return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VT, X, + DAG.getConstant(-1ULL, DL, VT), NewCmp); + + // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1) + // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1) + return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VT, X, + DAG.getConstant(0, DL, VT), NewCmp); } static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, @@ -32950,7 +34274,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1); - return OptimizeConditionalInDecrement(N, DAG); + return combineAddOrSubToADCOrSBB(N, DAG); } static SDValue combineSub(SDNode *N, SelectionDAG &DAG, @@ -32976,43 +34300,51 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, } } - // Try to synthesize horizontal adds from adds of shuffles. + // Try to synthesize horizontal subs from subs of shuffles. EVT VT = N->getValueType(0); if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && - isHorizontalBinOp(Op0, Op1, true)) + isHorizontalBinOp(Op0, Op1, false)) return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1); - return OptimizeConditionalInDecrement(N, DAG); + return combineAddOrSubToADCOrSBB(N, DAG); } static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + SDLoc DL(N); unsigned Opcode = N->getOpcode(); MVT VT = N->getSimpleValueType(0); MVT SVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = SVT.getSizeInBits(); + SDValue Op = N->getOperand(0); MVT OpVT = Op.getSimpleValueType(); MVT OpEltVT = OpVT.getVectorElementType(); - unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements(); + unsigned OpEltSizeInBits = OpEltVT.getSizeInBits(); + unsigned InputBits = OpEltSizeInBits * NumElts; // Perform any constant folding. // FIXME: Reduce constant pool usage and don't fold when OptSize is enabled. - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { - unsigned NumDstElts = VT.getVectorNumElements(); - SmallBitVector Undefs(NumDstElts, false); - SmallVector Vals(NumDstElts, APInt(SVT.getSizeInBits(), 0)); - for (unsigned i = 0; i != NumDstElts; ++i) { - SDValue OpElt = Op.getOperand(i); - if (OpElt.getOpcode() == ISD::UNDEF) { - Undefs[i] = true; + APInt UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(Op, OpEltSizeInBits, UndefElts, EltBits)) { + APInt Undefs(NumElts, 0); + SmallVector Vals(NumElts, APInt(EltSizeInBits, 0)); + bool IsZEXT = + (Opcode == X86ISD::VZEXT) || (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG); + for (unsigned i = 0; i != NumElts; ++i) { + if (UndefElts[i]) { + Undefs.setBit(i); continue; } - APInt Cst = cast(OpElt.getNode())->getAPIntValue(); - Vals[i] = Opcode == X86ISD::VZEXT ? Cst.zextOrTrunc(SVT.getSizeInBits()) - : Cst.sextOrTrunc(SVT.getSizeInBits()); + Vals[i] = IsZEXT ? EltBits[i].zextOrTrunc(EltSizeInBits) + : EltBits[i].sextOrTrunc(EltSizeInBits); } return getConstVector(Vals, Undefs, VT, DAG, DL); } @@ -33120,6 +34452,98 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDLoc dl(N); + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + + unsigned IdxVal = cast(Idx)->getZExtValue(); + MVT OpVT = N->getSimpleValueType(0); + MVT SubVecVT = SubVec.getSimpleValueType(); + + // If this is an insert of an extract, combine to a shuffle. Don't do this + // if the insert or extract can be represented with a subvector operation. + if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && + SubVec.getOperand(0).getSimpleValueType() == OpVT && + (IdxVal != 0 || !Vec.isUndef())) { + int ExtIdxVal = cast(SubVec.getOperand(1))->getZExtValue(); + if (ExtIdxVal != 0) { + int VecNumElts = OpVT.getVectorNumElements(); + int SubVecNumElts = SubVecVT.getVectorNumElements(); + SmallVector Mask(VecNumElts); + // First create an identity shuffle mask. + for (int i = 0; i != VecNumElts; ++i) + Mask[i] = i; + // Now insert the extracted portion. + for (int i = 0; i != SubVecNumElts; ++i) + Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts; + + return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask); + } + } + + // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte + // load: + // (insert_subvector (insert_subvector undef, (load16 addr), 0), + // (load16 addr + 16), Elts/2) + // --> load32 addr + // or: + // (insert_subvector (insert_subvector undef, (load32 addr), 0), + // (load32 addr + 32), Elts/2) + // --> load64 addr + // or a 16-byte or 32-byte broadcast: + // (insert_subvector (insert_subvector undef, (load16 addr), 0), + // (load16 addr), Elts/2) + // --> X86SubVBroadcast(load16 addr) + // or: + // (insert_subvector (insert_subvector undef, (load32 addr), 0), + // (load32 addr), Elts/2) + // --> X86SubVBroadcast(load32 addr) + if ((IdxVal == OpVT.getVectorNumElements() / 2) && + Vec.getOpcode() == ISD::INSERT_SUBVECTOR && + OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) { + auto *Idx2 = dyn_cast(Vec.getOperand(2)); + if (Idx2 && Idx2->getZExtValue() == 0) { + SDValue SubVec2 = Vec.getOperand(1); + // If needed, look through bitcasts to get to the load. + if (auto *FirstLd = dyn_cast(peekThroughBitcasts(SubVec2))) { + bool Fast; + unsigned Alignment = FirstLd->getAlignment(); + unsigned AS = FirstLd->getAddressSpace(); + const X86TargetLowering *TLI = Subtarget.getTargetLowering(); + if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), + OpVT, AS, Alignment, &Fast) && Fast) { + SDValue Ops[] = {SubVec2, SubVec}; + if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) + return Ld; + } + } + // If lower/upper loads are the same and the only users of the load, then + // lower to a VBROADCASTF128/VBROADCASTI128/etc. + if (auto *Ld = dyn_cast(peekThroughOneUseBitcasts(SubVec2))) { + if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && + SDNode::areOnlyUsersOf({N, Vec.getNode()}, SubVec2.getNode())) { + return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec); + } + } + // If this is subv_broadcast insert into both halves, use a larger + // subv_broadcast. + if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) { + return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, + SubVec.getOperand(0)); + } + } + } + + return SDValue(); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -33128,6 +34552,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::EXTRACT_VECTOR_ELT: return combineExtractVectorElt(N, DAG, DCI, Subtarget); + case X86ISD::PEXTRW: + case X86ISD::PEXTRB: + return combineExtractVectorElt_SSE(N, DAG, DCI, Subtarget); + case ISD::INSERT_SUBVECTOR: + return combineInsertSubvector(N, DAG, DCI, Subtarget); case ISD::VSELECT: case ISD::SELECT: case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget); @@ -33153,6 +34582,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, Subtarget); case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); + case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); case X86ISD::FXOR: @@ -33167,12 +34597,17 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget); case ISD::SETCC: return combineSetCC(N, DAG, Subtarget); - case X86ISD::SETCC: return combineX86SetCC(N, DAG, DCI, Subtarget); - case X86ISD::BRCOND: return combineBrCond(N, DAG, DCI, Subtarget); + case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget); + case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget); case X86ISD::VSHLI: + case X86ISD::VSRAI: case X86ISD::VSRLI: return combineVectorShift(N, DAG, DCI, Subtarget); + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: case X86ISD::VSEXT: case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget); + case X86ISD::PINSRB: + case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: case X86ISD::PALIGNR: diff --git a/lib/Target/Z80/Z80ISelDAGToDAG.cpp b/lib/Target/Z80/Z80ISelDAGToDAG.cpp index f45d063..46e1bd9 100644 --- a/lib/Target/Z80/Z80ISelDAGToDAG.cpp +++ b/lib/Target/Z80/Z80ISelDAGToDAG.cpp @@ -77,14 +77,7 @@ void Z80DAGToDAGISel::Select(SDNode *Node) { } // Select the default instruction - SDNode *ResNode = SelectCode(Node); - - DEBUG(dbgs() << "=> "; - if (ResNode == nullptr || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - dbgs() << '\n'); + SelectCode(Node); } bool Z80DAGToDAGISel::SelectMem(SDValue N, SDValue &Mem) { diff --git a/lib/Target/Z80/Z80MCInstLower.cpp b/lib/Target/Z80/Z80MCInstLower.cpp index 38b0de4..aeba7d4 100644 --- a/lib/Target/Z80/Z80MCInstLower.cpp +++ b/lib/Target/Z80/Z80MCInstLower.cpp @@ -73,7 +73,9 @@ Z80MCInstLower::LowerMachineOperand(const MachineInstr *MI, const MachineOperand &MO) const { switch (MO.getType()) { default: +#ifndef NDEBUG MI->dump(); +#endif llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: return MCOperand::createReg(MO.getReg()); diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll index 24499aa..afc11ac 100644 --- a/test/CodeGen/Hexagon/adde.ll +++ b/test/CodeGen/Hexagon/adde.ll @@ -1,30 +1,25 @@ -; RUN: llc -march=hexagon -disable-hsdr -hexagon-expand-condsets=0 -hexagon-bit=0 -disable-post-ra < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s -; CHECK-DAG: r[[HI0:[0-9]+]]:[[LO0:[0-9]+]] = combine(#0, #0) -; CHECK-DAG: r[[HI1:[0-9]+]]:[[LO1:[0-9]+]] = combine(#0, #1) -; CHECK: r[[LOSUM:[0-9:]+]] = add(r5:4, r1:0) -; CHECK: p[[CARRYFLAG:[0-9]+]] = cmp.gtu(r1:0, r5:4) -; CHECK: r[[LOSUM:[0-9:]+]] = add(r{{[0-9:]+}}, r3:2) -; CHECK: r[[LOCARRY:[0-9]+]] = mux(p[[CARRYFLAG]], r[[LO1]], r[[LO0]]) -; CHECK: r[[HICARRY:[0-9]+]] = mux(p[[CARRYFLAG]], r[[HI1]], r[[HI0]]) -; CHECK: r[[CARRY:[0-9:]+]] = combine(r[[HICARRY]], r[[LOCARRY]]) -; CHECK: r[[HISUM:[0-9:]+]] = add(r3:2, r[[CARRY]]) +; CHECK-DAG: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) +; CHECK-DAG: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) +; CHECK-DAG: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}},r{{[0-9]+:[0-9]+}}) +; CHECK-DAG: r{{[0-9]+}} = mux(p{{[0-9]+}},r{{[0-9]+}},r{{[0-9]+}}) -define void @check_adde_addc (i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) { -entry: - %tmp1 = zext i64 %AL to i128 - %tmp23 = zext i64 %AH to i128 - %tmp4 = shl i128 %tmp23, 64 - %tmp5 = or i128 %tmp4, %tmp1 - %tmp67 = zext i64 %BL to i128 - %tmp89 = zext i64 %BH to i128 - %tmp11 = shl i128 %tmp89, 64 - %tmp12 = or i128 %tmp11, %tmp67 - %tmp15 = add i128 %tmp12, %tmp5 - %tmp1617 = trunc i128 %tmp15 to i64 - store i64 %tmp1617, i64* %RL - %tmp21 = lshr i128 %tmp15, 64 - %tmp2122 = trunc i128 %tmp21 to i64 - store i64 %tmp2122, i64* %RH - ret void +define void @check_adde_addc(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64* %a4, i64* %a5) { +b6: + %v7 = zext i64 %a0 to i128 + %v8 = zext i64 %a1 to i128 + %v9 = shl i128 %v8, 64 + %v10 = or i128 %v7, %v9 + %v11 = zext i64 %a2 to i128 + %v12 = zext i64 %a3 to i128 + %v13 = shl i128 %v12, 64 + %v14 = or i128 %v11, %v13 + %v15 = add i128 %v10, %v14 + %v16 = lshr i128 %v15, 64 + %v17 = trunc i128 %v15 to i64 + %v18 = trunc i128 %v16 to i64 + store i64 %v17, i64* %a4 + store i64 %v18, i64* %a5 + ret void } diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll index d233ebc..9eaa722 100644 --- a/test/CodeGen/MSP430/Inst8rr.ll +++ b/test/CodeGen/MSP430/Inst8rr.ll @@ -4,7 +4,7 @@ target triple = "msp430-generic-generic" define i8 @mov(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: mov: -; CHECK: mov.{{[bw]}} r14, r15 +; CHECK: mov.{{[bw]}} r13, r12 ret i8 %b } @@ -17,21 +17,21 @@ define i8 @add(i8 %a, i8 %b) nounwind { define i8 @and(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: and: -; CHECK: and.{{[bw]}} r14, r15 +; CHECK: and.{{[bw]}} r13, r12 %1 = and i8 %a, %b ret i8 %1 } define i8 @bis(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: bis: -; CHECK: bis.{{[bw]}} r14, r15 +; CHECK: bis.{{[bw]}} r13, r12 %1 = or i8 %a, %b ret i8 %1 } define i8 @bic(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: bic: -; CHECK: bic.{{[bw]}} r14, r15 +; CHECK: bic.{{[bw]}} r13, r12 %1 = xor i8 %b, -1 %2 = and i8 %a, %1 ret i8 %2 @@ -39,7 +39,7 @@ define i8 @bic(i8 %a, i8 %b) nounwind { define i8 @xor(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: xor: -; CHECK: xor.{{[bw]}} r14, r15 +; CHECK: xor.{{[bw]}} r13, r12 %1 = xor i8 %a, %b ret i8 %1 } diff --git a/test/CodeGen/Mips/dynamic-stack-realignment.ll b/test/CodeGen/Mips/dynamic-stack-realignment.ll new file mode 100644 index 0000000..9d8c27a --- /dev/null +++ b/test/CodeGen/Mips/dynamic-stack-realignment.ll @@ -0,0 +1,299 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 -relocation-model=pic | FileCheck %s \ +; RUN: --check-prefixes=ALL,GP32 +; RUN: llc < %s -march=mips -mcpu=mips32 -relocation-model=pic | FileCheck %s \ +; RUN: --check-prefixes=ALL,GP32 +; RUN: llc < %s -march=mips -mcpu=mips32r6 -relocation-model=pic | FileCheck %s \ +; RUN: --check-prefixes=ALL,GP32 +; RUN: llc < %s -march=mips64 -mcpu=mips3 -relocation-model=pic | FileCheck %s \ +; RUN: --check-prefixes=ALL,GP64,N64 +; RUN: llc < %s -march=mips64 -mcpu=mips64 -relocation-model=pic | FileCheck %s \ +; RUN: --check-prefixes=ALL,GP64,N64 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 -relocation-model=pic | FileCheck %s \ +; RUN: --check-prefixes=ALL,GP64,N64 +; RUN: llc < %s -march=mips64 -mcpu=mips3 -target-abi n32 -relocation-model=pic | FileCheck %s \ +; RUN: --check-prefixes=ALL,GP64,N32 +; RUN: llc < %s -march=mips64 -mcpu=mips64 -target-abi n32 -relocation-model=pic | FileCheck %s \ +; RUN: --check-prefixes=ALL,GP64,N32 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 -target-abi n32 -relocation-model=pic | FileCheck %s \ +; RUN: --check-prefixes=ALL,GP64,N32 + +; Check dynamic stack realignment in functions without variable-sized objects. + +declare void @helper_01(i32, i32, i32, i32, i32*) + +; O32 ABI +define void @func_01() { +entry: +; GP32-LABEL: func_01: + + ; prologue + ; FIXME: We are currently over-allocating stack space. This particular case + ; needs a frame of up to between 16 and 512-bytes but currently + ; allocates between 1024 and 1536 bytes + ; GP32: addiu $sp, $sp, -1024 + ; GP32: sw $ra, 1020($sp) + ; GP32: sw $fp, 1016($sp) + ; + ; GP32: move $fp, $sp + ; GP32: addiu $[[T0:[0-9]+|ra|gp]], $zero, -512 + ; GP32-NEXT: and $sp, $sp, $[[T0]] + + ; body + ; GP32: addiu $[[T1:[0-9]+]], $sp, 512 + ; GP32: sw $[[T1]], 16($sp) + + ; epilogue + ; GP32: move $sp, $fp + ; GP32: lw $fp, 1016($sp) + ; GP32: lw $ra, 1020($sp) + ; GP32: addiu $sp, $sp, 1024 + + %a = alloca i32, align 512 + call void @helper_01(i32 0, i32 0, i32 0, i32 0, i32* %a) + ret void +} + +declare void @helper_02(i32, i32, i32, i32, + i32, i32, i32, i32, i32*) + +; N32/N64 ABIs +define void @func_02() { +entry: +; GP64-LABEL: func_02: + + ; prologue + ; FIXME: We are currently over-allocating stack space. This particular case + ; needs a frame of up to between 16 and 512-bytes but currently + ; allocates between 1024 and 1536 bytes + ; N32: addiu $sp, $sp, -1024 + ; N64: daddiu $sp, $sp, -1024 + ; GP64: sd $ra, 1016($sp) + ; GP64: sd $fp, 1008($sp) + ; N32: sd $gp, 1000($sp) + ; + ; GP64: move $fp, $sp + ; N32: addiu $[[T0:[0-9]+|ra]], $zero, -512 + ; N64: daddiu $[[T0:[0-9]+|ra]], $zero, -512 + ; GP64-NEXT: and $sp, $sp, $[[T0]] + + ; body + ; N32: addiu $[[T1:[0-9]+]], $sp, 512 + ; N64: daddiu $[[T1:[0-9]+]], $sp, 512 + ; GP64: sd $[[T1]], 0($sp) + + ; epilogue + ; GP64: move $sp, $fp + ; N32: ld $gp, 1000($sp) + ; GP64: ld $fp, 1008($sp) + ; GP64: ld $ra, 1016($sp) + ; N32: addiu $sp, $sp, 1024 + ; N64: daddiu $sp, $sp, 1024 + + %a = alloca i32, align 512 + call void @helper_02(i32 0, i32 0, i32 0, i32 0, + i32 0, i32 0, i32 0, i32 0, i32* %a) + ret void +} + +; Verify that we use $fp for referencing incoming arguments. + +declare void @helper_03(i32, i32, i32, i32, i32*, i32*) + +; O32 ABI +define void @func_03(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32* %b) { +entry: +; GP32-LABEL: func_03: + + ; body + ; FIXME: We are currently over-allocating stack space. + ; GP32-DAG: addiu $[[T0:[0-9]+]], $sp, 512 + ; GP32-DAG: sw $[[T0]], 16($sp) + ; GP32-DAG: lw $[[T1:[0-9]+]], 1040($fp) + ; GP32-DAG: sw $[[T1]], 20($sp) + + %a = alloca i32, align 512 + call void @helper_03(i32 0, i32 0, i32 0, i32 0, i32* %a, i32* %b) + ret void +} + +declare void @helper_04(i32, i32, i32, i32, + i32, i32, i32, i32, i32*, i32*) + +; N32/N64 ABIs +define void @func_04(i32 %p0, i32 %p1, i32 %p2, i32 %p3, + i32 %p4, i32 %p5, i32 %p6, i32 %p7, + i32* %b) { +entry: +; GP64-LABEL: func_04: + + ; body + ; FIXME: We are currently over-allocating stack space. + ; N32-DAG: addiu $[[T0:[0-9]+]], $sp, 512 + ; N64-DAG: daddiu $[[T0:[0-9]+]], $sp, 512 + ; GP64-DAG: sd $[[T0]], 0($sp) + ; GP64-DAG: l{{[dw]}} $[[T1:[0-9]+]], 102{{[48]}}($fp) + ; GP64-DAG: sd $[[T1]], 8($sp) + + %a = alloca i32, align 512 + call void @helper_04(i32 0, i32 0, i32 0, i32 0, + i32 0, i32 0, i32 0, i32 0, i32* %a, i32* %b) + ret void +} + +; Check dynamic stack realignment in functions with variable-sized objects. + +; O32 ABI +define void @func_05(i32 %sz) { +entry: +; GP32-LABEL: func_05: + + ; prologue + ; FIXME: We are currently over-allocating stack space. + ; GP32: addiu $sp, $sp, -1024 + ; GP32: sw $fp, 1020($sp) + ; GP32: sw $23, 1016($sp) + ; + ; GP32: move $fp, $sp + ; GP32: addiu $[[T0:[0-9]+|gp]], $zero, -512 + ; GP32-NEXT: and $sp, $sp, $[[T0]] + ; GP32-NEXT: move $23, $sp + + ; body + ; GP32: addiu $[[T1:[0-9]+]], $zero, 222 + ; GP32: sw $[[T1]], 508($23) + + ; epilogue + ; GP32: move $sp, $fp + ; GP32: lw $23, 1016($sp) + ; GP32: lw $fp, 1020($sp) + ; GP32: addiu $sp, $sp, 1024 + + %a0 = alloca i32, i32 %sz, align 512 + %a1 = alloca i32, align 4 + + store volatile i32 111, i32* %a0, align 512 + store volatile i32 222, i32* %a1, align 4 + + ret void +} + +; N32/N64 ABIs +define void @func_06(i32 %sz) { +entry: +; GP64-LABEL: func_06: + + ; prologue + ; FIXME: We are currently over-allocating stack space. + ; N32: addiu $sp, $sp, -1024 + ; N64: daddiu $sp, $sp, -1024 + ; GP64: sd $fp, 1016($sp) + ; GP64: sd $23, 1008($sp) + ; + ; GP64: move $fp, $sp + ; GP64: addiu $[[T0:[0-9]+|gp]], $zero, -512 + ; GP64-NEXT: and $sp, $sp, $[[T0]] + ; GP64-NEXT: move $23, $sp + + ; body + ; GP64: addiu $[[T1:[0-9]+]], $zero, 222 + ; GP64: sw $[[T1]], 508($23) + + ; epilogue + ; GP64: move $sp, $fp + ; GP64: ld $23, 1008($sp) + ; GP64: ld $fp, 1016($sp) + ; N32: addiu $sp, $sp, 1024 + ; N64: daddiu $sp, $sp, 1024 + + %a0 = alloca i32, i32 %sz, align 512 + %a1 = alloca i32, align 4 + + store volatile i32 111, i32* %a0, align 512 + store volatile i32 222, i32* %a1, align 4 + + ret void +} + +; Verify that we use $fp for referencing incoming arguments and $sp for +; building outbound arguments for nested function calls. + +; O32 ABI +define void @func_07(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %sz) { +entry: +; GP32-LABEL: func_07: + + ; body + ; FIXME: We are currently over-allocating stack space. + ; GP32-DAG: lw $[[T0:[0-9]+]], 1040($fp) + ; + ; GP32-DAG: addiu $[[T1:[0-9]+]], $zero, 222 + ; GP32-DAG: sw $[[T1]], 508($23) + ; + ; GP32-DAG: sw $[[T2:[0-9]+]], 16($sp) + + %a0 = alloca i32, i32 %sz, align 512 + %a1 = alloca i32, align 4 + + store volatile i32 111, i32* %a0, align 512 + store volatile i32 222, i32* %a1, align 4 + + call void @helper_01(i32 0, i32 0, i32 0, i32 0, i32* %a1) + + ret void +} + +; N32/N64 ABIs +define void @func_08(i32 %p0, i32 %p1, i32 %p2, i32 %p3, + i32 %p4, i32 %p5, i32 %p6, i32 %p7, + i32 %sz) { +entry: +; GP64-LABEL: func_08: + + ; body + ; FIXME: We are currently over-allocating stack space. + ; N32-DAG: lw $[[T0:[0-9]+]], 1028($fp) + ; N64-DAG: lwu $[[T0:[0-9]+]], 1028($fp) + ; + ; GP64-DAG: addiu $[[T1:[0-9]+]], $zero, 222 + ; GP64-DAG: sw $[[T1]], 508($23) + ; + ; GP64-DAG: sd $[[T2:[0-9]+]], 0($sp) + + %a0 = alloca i32, i32 %sz, align 512 + %a1 = alloca i32, align 4 + + store volatile i32 111, i32* %a0, align 512 + store volatile i32 222, i32* %a1, align 4 + + call void @helper_02(i32 0, i32 0, i32 0, i32 0, + i32 0, i32 0, i32 0, i32 0, i32* %a1) + ret void +} + +; Check that we do not perform dynamic stack realignment in the presence of +; the "no-realign-stack" function attribute. +define void @func_09() "no-realign-stack" { +entry: +; ALL-LABEL: func_09: + + ; ALL-NOT: and $sp, $sp, $[[T0:[0-9]+|ra|gp]] + + %a = alloca i32, align 512 + call void @helper_01(i32 0, i32 0, i32 0, i32 0, i32* %a) + ret void +} + +define void @func_10(i32 %sz) "no-realign-stack" { +entry: +; ALL-LABEL: func_10: + + ; ALL-NOT: and $sp, $sp, $[[T0:[0-9]+|ra|gp]] + + %a0 = alloca i32, i32 %sz, align 512 + %a1 = alloca i32, align 4 + + store volatile i32 111, i32* %a0, align 512 + store volatile i32 222, i32* %a1, align 4 + + ret void +} diff --git a/test/CodeGen/NVPTX/add-128bit.ll b/test/CodeGen/NVPTX/add-128bit.ll index 529a5a9..d16e282 100644 --- a/test/CodeGen/NVPTX/add-128bit.ll +++ b/test/CodeGen/NVPTX/add-128bit.ll @@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @foo(i64 %a, i64 %add, i128* %retptr) { ; CHECK: add.s64 ; CHECK: setp.lt.u64 -; CHECK: selp.b64 +; CHECK: selp.u64 ; CHECK: add.s64 %t1 = sext i64 %a to i128 %add2 = zext i64 %add to i128 diff --git a/test/CodeGen/X86/adde-carry.ll b/test/CodeGen/X86/adde-carry.ll new file mode 100644 index 0000000..f25df16 --- /dev/null +++ b/test/CodeGen/X86/adde-carry.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s + +define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind { +; CHECK-LABEL: a: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: adcq $0, %r8 +; CHECK-NEXT: movq %r8, (%rdi) +; CHECK-NEXT: movq %rdx, (%rsi) +; CHECK-NEXT: retq +entry: + %0 = zext i64 %a to i128 + %1 = zext i64 %b to i128 + %2 = add i128 %1, %0 + %3 = zext i64 %c to i128 + %4 = shl i128 %3, 64 + %5 = add i128 %4, %2 + %6 = lshr i128 %5, 64 + %7 = trunc i128 %6 to i64 + store i64 %7, i64* %s, align 8 + %8 = trunc i128 %2 to i64 + store i64 %8, i64* %t, align 8 + ret void +} + +define void @b(i32* nocapture %r, i64 %a, i64 %b, i32 %c) nounwind { +; CHECK-LABEL: b: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addq %rdx, %rsi +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movl %ecx, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = zext i64 %a to i128 + %1 = zext i64 %b to i128 + %2 = zext i32 %c to i128 + %3 = add i128 %1, %0 + %4 = lshr i128 %3, 64 + %5 = add i128 %4, %2 + %6 = trunc i128 %5 to i32 + store i32 %6, i32* %r, align 4 + ret void +} + +define void @c(i16* nocapture %r, i64 %a, i64 %b, i16 %c) nounwind { +; CHECK-LABEL: c: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addq %rdx, %rsi +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movw %cx, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = zext i64 %a to i128 + %1 = zext i64 %b to i128 + %2 = zext i16 %c to i128 + %3 = add i128 %1, %0 + %4 = lshr i128 %3, 64 + %5 = add i128 %4, %2 + %6 = trunc i128 %5 to i16 + store i16 %6, i16* %r, align 4 + ret void +} + +define void @d(i8* nocapture %r, i64 %a, i64 %b, i8 %c) nounwind { +; CHECK-LABEL: d: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addq %rdx, %rsi +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movb %cl, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = zext i64 %a to i128 + %1 = zext i64 %b to i128 + %2 = zext i8 %c to i128 + %3 = add i128 %1, %0 + %4 = lshr i128 %3, 64 + %5 = add i128 %4, %2 + %6 = trunc i128 %5 to i8 + store i8 %6, i8* %r, align 4 + ret void +} + +%scalar = type { [4 x i64] } + +define %scalar @pr31719(%scalar* nocapture readonly %this, %scalar %arg.b) { +; CHECK-LABEL: pr31719: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addq (%rsi), %rdx +; CHECK-NEXT: sbbq %r10, %r10 +; CHECK-NEXT: andl $1, %r10d +; CHECK-NEXT: addq 8(%rsi), %rcx +; CHECK-NEXT: sbbq %r11, %r11 +; CHECK-NEXT: andl $1, %r11d +; CHECK-NEXT: addq %r10, %rcx +; CHECK-NEXT: adcq $0, %r11 +; CHECK-NEXT: addq 16(%rsi), %r8 +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: addq %r11, %r8 +; CHECK-NEXT: adcq $0, %rax +; CHECK-NEXT: addq 24(%rsi), %r9 +; CHECK-NEXT: addq %rax, %r9 +; CHECK-NEXT: movq %rdx, (%rdi) +; CHECK-NEXT: movq %rcx, 8(%rdi) +; CHECK-NEXT: movq %r8, 16(%rdi) +; CHECK-NEXT: movq %r9, 24(%rdi) +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq +entry: + %0 = extractvalue %scalar %arg.b, 0 + %.elt = extractvalue [4 x i64] %0, 0 + %.elt24 = extractvalue [4 x i64] %0, 1 + %.elt26 = extractvalue [4 x i64] %0, 2 + %.elt28 = extractvalue [4 x i64] %0, 3 + %1 = getelementptr inbounds %scalar , %scalar* %this, i64 0, i32 0, i64 0 + %2 = load i64, i64* %1, align 8 + %3 = zext i64 %2 to i128 + %4 = zext i64 %.elt to i128 + %5 = add nuw nsw i128 %3, %4 + %6 = trunc i128 %5 to i64 + %7 = lshr i128 %5, 64 + %8 = getelementptr inbounds %scalar , %scalar * %this, i64 0, i32 0, i64 1 + %9 = load i64, i64* %8, align 8 + %10 = zext i64 %9 to i128 + %11 = zext i64 %.elt24 to i128 + %12 = add nuw nsw i128 %10, %11 + %13 = add nuw nsw i128 %12, %7 + %14 = trunc i128 %13 to i64 + %15 = lshr i128 %13, 64 + %16 = getelementptr inbounds %scalar , %scalar* %this, i64 0, i32 0, i64 2 + %17 = load i64, i64* %16, align 8 + %18 = zext i64 %17 to i128 + %19 = zext i64 %.elt26 to i128 + %20 = add nuw nsw i128 %18, %19 + %21 = add nuw nsw i128 %20, %15 + %22 = trunc i128 %21 to i64 + %23 = lshr i128 %21, 64 + %24 = getelementptr inbounds %scalar , %scalar* %this, i64 0, i32 0, i64 3 + %25 = load i64, i64* %24, align 8 + %26 = zext i64 %25 to i128 + %27 = zext i64 %.elt28 to i128 + %28 = add nuw nsw i128 %26, %27 + %29 = add nuw nsw i128 %28, %23 + %30 = trunc i128 %29 to i64 + %31 = insertvalue [4 x i64] undef, i64 %6, 0 + %32 = insertvalue [4 x i64] %31, i64 %14, 1 + %33 = insertvalue [4 x i64] %32, i64 %22, 2 + %34 = insertvalue [4 x i64] %33, i64 %30, 3 + %35 = insertvalue %scalar undef, [4 x i64] %34, 0 + ret %scalar %35 +} + +%accumulator= type { i64, i64, i32 } + +define void @muladd(%accumulator* nocapture %this, i64 %arg.a, i64 %arg.b) { +; CHECK-LABEL: muladd: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: mulq %rsi +; CHECK-NEXT: addq (%rdi), %rax +; CHECK-NEXT: adcq $0, %rdx +; CHECK-NEXT: movq %rax, (%rdi) +; CHECK-NEXT: addq 8(%rdi), %rdx +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: movq %rdx, 8(%rdi) +; CHECK-NEXT: subl %eax, 16(%rdi) +; CHECK-NEXT: retq +entry: + %0 = zext i64 %arg.a to i128 + %1 = zext i64 %arg.b to i128 + %2 = mul nuw i128 %1, %0 + %3 = getelementptr inbounds %accumulator, %accumulator* %this, i64 0, i32 0 + %4 = load i64, i64* %3, align 8 + %5 = zext i64 %4 to i128 + %6 = add i128 %5, %2 + %7 = trunc i128 %6 to i64 + store i64 %7, i64* %3, align 8 + %8 = lshr i128 %6, 64 + %9 = getelementptr inbounds %accumulator, %accumulator* %this, i64 0, i32 1 + %10 = load i64, i64* %9, align 8 + %11 = zext i64 %10 to i128 + %12 = add nuw nsw i128 %8, %11 + %13 = trunc i128 %12 to i64 + store i64 %13, i64* %9, align 8 + %14 = lshr i128 %12, 64 + %15 = getelementptr inbounds %accumulator, %accumulator* %this, i64 0, i32 2 + %16 = load i32, i32* %15, align 4 + %17 = zext i32 %16 to i128 + %18 = add nuw nsw i128 %14, %17 + %19 = trunc i128 %18 to i32 + store i32 %19, i32* %15, align 4 + ret void +} diff --git a/test/CodeGen/X86/promote-vec3.ll b/test/CodeGen/X86/promote-vec3.ll new file mode 100644 index 0000000..75bfb4e --- /dev/null +++ b/test/CodeGen/X86/promote-vec3.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=SSE3 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-32 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX-64 + +define <3 x i16> @zext_i8(<3 x i8>) { +; SSE3-LABEL: zext_i8: +; SSE3: # BB#0: +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movd %eax, %xmm0 +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: pinsrw $1, %eax, %xmm0 +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: pinsrw $2, %eax, %xmm0 +; SSE3-NEXT: pxor %xmm1, %xmm1 +; SSE3-NEXT: pextrw $1, %xmm0, %edx +; SSE3-NEXT: pextrw $2, %xmm0, %ecx +; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: # kill: %AX %AX %EAX +; SSE3-NEXT: # kill: %DX %DX %EDX +; SSE3-NEXT: # kill: %CX %CX %ECX +; SSE3-NEXT: retl +; +; SSE41-LABEL: zext_i8: +; SSE41: # BB#0: +; SSE41-NEXT: pxor %xmm0, %xmm0 +; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: pextrw $2, %xmm0, %edx +; SSE41-NEXT: pextrw $4, %xmm0, %ecx +; SSE41-NEXT: # kill: %AX %AX %EAX +; SSE41-NEXT: # kill: %DX %DX %EDX +; SSE41-NEXT: # kill: %CX %CX %ECX +; SSE41-NEXT: retl +; +; AVX-32-LABEL: zext_i8: +; AVX-32: # BB#0: +; AVX-32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vmovd %xmm0, %eax +; AVX-32-NEXT: vpextrw $2, %xmm0, %edx +; AVX-32-NEXT: vpextrw $4, %xmm0, %ecx +; AVX-32-NEXT: # kill: %AX %AX %EAX +; AVX-32-NEXT: # kill: %DX %DX %EDX +; AVX-32-NEXT: # kill: %CX %CX %ECX +; AVX-32-NEXT: retl +; +; AVX-64-LABEL: zext_i8: +; AVX-64: # BB#0: +; AVX-64-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX-64-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 +; AVX-64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 +; AVX-64-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 +; AVX-64-NEXT: vmovd %xmm0, %eax +; AVX-64-NEXT: vpextrw $2, %xmm0, %edx +; AVX-64-NEXT: vpextrw $4, %xmm0, %ecx +; AVX-64-NEXT: # kill: %AX %AX %EAX +; AVX-64-NEXT: # kill: %DX %DX %EDX +; AVX-64-NEXT: # kill: %CX %CX %ECX +; AVX-64-NEXT: retq + %2 = zext <3 x i8> %0 to <3 x i16> + ret <3 x i16> %2 +} + +define <3 x i16> @sext_i8(<3 x i8>) { +; SSE3-LABEL: sext_i8: +; SSE3: # BB#0: +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: movd %eax, %xmm0 +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: pinsrw $1, %eax, %xmm0 +; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE3-NEXT: pinsrw $2, %eax, %xmm0 +; SSE3-NEXT: psllw $8, %xmm0 +; SSE3-NEXT: psraw $8, %xmm0 +; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE3-NEXT: psrad $16, %xmm0 +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: pextrw $2, %xmm0, %edx +; SSE3-NEXT: pextrw $4, %xmm0, %ecx +; SSE3-NEXT: # kill: %AX %AX %EAX +; SSE3-NEXT: # kill: %DX %DX %EDX +; SSE3-NEXT: # kill: %CX %CX %ECX +; SSE3-NEXT: retl +; +; SSE41-LABEL: sext_i8: +; SSE41: # BB#0: +; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 +; SSE41-NEXT: pslld $24, %xmm0 +; SSE41-NEXT: psrad $24, %xmm0 +; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: pextrw $2, %xmm0, %edx +; SSE41-NEXT: pextrw $4, %xmm0, %ecx +; SSE41-NEXT: # kill: %AX %AX %EAX +; SSE41-NEXT: # kill: %DX %DX %EDX +; SSE41-NEXT: # kill: %CX %CX %ECX +; SSE41-NEXT: retl +; +; AVX-32-LABEL: sext_i8: +; AVX-32: # BB#0: +; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vpslld $24, %xmm0, %xmm0 +; AVX-32-NEXT: vpsrad $24, %xmm0, %xmm0 +; AVX-32-NEXT: vmovd %xmm0, %eax +; AVX-32-NEXT: vpextrw $2, %xmm0, %edx +; AVX-32-NEXT: vpextrw $4, %xmm0, %ecx +; AVX-32-NEXT: # kill: %AX %AX %EAX +; AVX-32-NEXT: # kill: %DX %DX %EDX +; AVX-32-NEXT: # kill: %CX %CX %ECX +; AVX-32-NEXT: retl +; +; AVX-64-LABEL: sext_i8: +; AVX-64: # BB#0: +; AVX-64-NEXT: vmovd %edi, %xmm0 +; AVX-64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 +; AVX-64-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 +; AVX-64-NEXT: vpslld $24, %xmm0, %xmm0 +; AVX-64-NEXT: vpsrad $24, %xmm0, %xmm0 +; AVX-64-NEXT: vmovd %xmm0, %eax +; AVX-64-NEXT: vpextrw $2, %xmm0, %edx +; AVX-64-NEXT: vpextrw $4, %xmm0, %ecx +; AVX-64-NEXT: # kill: %AX %AX %EAX +; AVX-64-NEXT: # kill: %DX %DX %EDX +; AVX-64-NEXT: # kill: %CX %CX %ECX +; AVX-64-NEXT: retq + %2 = sext <3 x i8> %0 to <3 x i16> + ret <3 x i16> %2 +} diff --git a/test/CodeGen/X86/vselect-pcmp.ll b/test/CodeGen/X86/vselect-pcmp.ll new file mode 100644 index 0000000..d33fda4 --- /dev/null +++ b/test/CodeGen/X86/vselect-pcmp.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL + +; The condition vector for BLENDV* only cares about the sign bit of each element. +; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op. + +; Test 128-bit vectors for all legal element types. + +; FIXME: Why didn't AVX-512 optimize too? + +define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) { +; AVX12-LABEL: signbit_sel_v16i8: +; AVX12: # BB#0: +; AVX12-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512-LABEL: signbit_sel_v16i8: +; AVX512: # BB#0: +; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 +; AVX512-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq + %tr = icmp slt <16 x i8> %mask, zeroinitializer + %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y + ret <16 x i8> %z +} + +; Sorry 16-bit, you're not important enough to support? + +define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) { +; AVX-LABEL: signbit_sel_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX-NEXT: vpandn %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %tr = icmp slt <8 x i16> %mask, zeroinitializer + %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %z +} + +define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; AVX12F-LABEL: signbit_sel_v4i32: +; AVX12F: # BB#0: +; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v4i32: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 +; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: retq + %tr = icmp slt <4 x i32> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %z +} + +define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { +; AVX12F-LABEL: signbit_sel_v2i64: +; AVX12F: # BB#0: +; AVX12F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v2i64: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 +; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: retq + %tr = icmp slt <2 x i64> %mask, zeroinitializer + %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y + ret <2 x i64> %z +} + +define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) { +; AVX12F-LABEL: signbit_sel_v4f32: +; AVX12F: # BB#0: +; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v4f32: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 +; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: retq + %tr = icmp slt <4 x i32> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y + ret <4 x float> %z +} + +define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) { +; AVX12F-LABEL: signbit_sel_v2f64: +; AVX12F: # BB#0: +; AVX12F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v2f64: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 +; AVX512VL-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: retq + %tr = icmp slt <2 x i64> %mask, zeroinitializer + %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y + ret <2 x double> %z +} + +; Test 256-bit vectors to see differences between AVX1 and AVX2. + +define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) { +; AVX1-LABEL: signbit_sel_v32i8: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v32i8: +; AVX2: # BB#0: +; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_sel_v32i8: +; AVX512: # BB#0: +; AVX512-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2 +; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: retq + %tr = icmp slt <32 x i8> %mask, zeroinitializer + %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y + ret <32 x i8> %z +} + +; Sorry 16-bit, you'll never be important enough to support? + +define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) { +; AVX1-LABEL: signbit_sel_v16i16: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtw %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v16i16: +; AVX2: # BB#0: +; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpandn %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_sel_v16i16: +; AVX512: # BB#0: +; AVX512-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 +; AVX512-NEXT: vpandn %ymm1, %ymm2, %ymm1 +; AVX512-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %tr = icmp slt <16 x i16> %mask, zeroinitializer + %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y + ret <16 x i16> %z +} + +define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) { +; AVX12-LABEL: signbit_sel_v8i32: +; AVX12: # BB#0: +; AVX12-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v8i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: # kill: %YMM2 %YMM2 %ZMM2 +; AVX512F-NEXT: # kill: %YMM1 %YMM1 %ZMM1 +; AVX512F-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512F-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v8i32: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 +; AVX512VL-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: retq + %tr = icmp slt <8 x i32> %mask, zeroinitializer + %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y + ret <8 x i32> %z +} + +define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { +; AVX12F-LABEL: signbit_sel_v4i64: +; AVX12F: # BB#0: +; AVX12F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX12F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v4i64: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 +; AVX512VL-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: retq + %tr = icmp slt <4 x i64> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y + ret <4 x i64> %z +} + +define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) { +; AVX12F-LABEL: signbit_sel_v4f64: +; AVX12F: # BB#0: +; AVX12F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX12F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v4f64: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %ymm3, %ymm3, %ymm3 +; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 +; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: retq + %tr = icmp slt <4 x i64> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y + ret <4 x double> %z +} + +; Try a condition with a different type than the select operands. + +define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) { +; AVX1-LABEL: signbit_sel_v4f64_small_mask: +; AVX1: # BB#0: +; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] +; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_sel_v4f64_small_mask: +; AVX2: # BB#0: +; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 +; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f64_small_mask: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpmovsxdq %xmm2, %ymm2 +; AVX512F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v4f64_small_mask: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 +; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: retq + %tr = icmp slt <4 x i32> %mask, zeroinitializer + %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y + ret <4 x double> %z +} + +; Try a 512-bit vector to make sure AVX-512 is handled as expected. + +define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) { +; AVX12-LABEL: signbit_sel_v8f64: +; AVX12: # BB#0: +; AVX12-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 +; AVX12-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 +; AVX12-NEXT: retq +; +; AVX512-LABEL: signbit_sel_v8f64: +; AVX512: # BB#0: +; AVX512-NEXT: vpxord %zmm3, %zmm3, %zmm3 +; AVX512-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512-NEXT: retq + %tr = icmp slt <8 x i64> %mask, zeroinitializer + %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y + ret <8 x double> %z +} + +; If we have a floating-point compare: +; (1) Don't die. +; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded. + +define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 { +; AVX12F-LABEL: signbit_sel_v4f32_fcmp: +; AVX12F: # BB#0: +; AVX12F-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX12F-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 +; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v4f32_fcmp: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1 +; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: retq + %cmp = fcmp olt <4 x float> %x, zeroinitializer + %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y + ret <4 x float> %sel +} + +attributes #0 = { "no-nans-fp-math"="true" } diff --git a/test/CodeGen/X86/widen_bitops-0.ll b/test/CodeGen/X86/widen_bitops-0.ll new file mode 100644 index 0000000..b0d5e14 --- /dev/null +++ b/test/CodeGen/X86/widen_bitops-0.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42 + +; +; AND/XOR/OR i24 as v3i8 +; + +define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind { +; X32-SSE-LABEL: and_i24_as_v3i8: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: and_i24_as_v3i8: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: andl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: retq + %1 = bitcast i24 %a to <3 x i8> + %2 = bitcast i24 %b to <3 x i8> + %3 = and <3 x i8> %1, %2 + %4 = bitcast <3 x i8> %3 to i24 + ret i24 %4 +} + +define i24 @xor_i24_as_v3i8(i24 %a, i24 %b) nounwind { +; X32-SSE-LABEL: xor_i24_as_v3i8: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: xor_i24_as_v3i8: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: xorl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: retq + %1 = bitcast i24 %a to <3 x i8> + %2 = bitcast i24 %b to <3 x i8> + %3 = xor <3 x i8> %1, %2 + %4 = bitcast <3 x i8> %3 to i24 + ret i24 %4 +} + +define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind { +; X32-SSE-LABEL: or_i24_as_v3i8: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: or_i24_as_v3i8: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: orl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: retq + %1 = bitcast i24 %a to <3 x i8> + %2 = bitcast i24 %b to <3 x i8> + %3 = or <3 x i8> %1, %2 + %4 = bitcast <3 x i8> %3 to i24 + ret i24 %4 +} + +; +; AND/XOR/OR i24 as v8i3 +; + +define i24 @and_i24_as_v8i3(i24 %a, i24 %b) nounwind { +; X32-SSE-LABEL: and_i24_as_v8i3: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: and_i24_as_v8i3: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: andl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: retq + %1 = bitcast i24 %a to <8 x i3> + %2 = bitcast i24 %b to <8 x i3> + %3 = and <8 x i3> %1, %2 + %4 = bitcast <8 x i3> %3 to i24 + ret i24 %4 +} + +define i24 @xor_i24_as_v8i3(i24 %a, i24 %b) nounwind { +; X32-SSE-LABEL: xor_i24_as_v8i3: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: xor_i24_as_v8i3: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: xorl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: retq + %1 = bitcast i24 %a to <8 x i3> + %2 = bitcast i24 %b to <8 x i3> + %3 = xor <8 x i3> %1, %2 + %4 = bitcast <8 x i3> %3 to i24 + ret i24 %4 +} + +define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind { +; X32-SSE-LABEL: or_i24_as_v8i3: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: or_i24_as_v8i3: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: orl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax +; X64-SSE-NEXT: retq + %1 = bitcast i24 %a to <8 x i3> + %2 = bitcast i24 %b to <8 x i3> + %3 = or <8 x i3> %1, %2 + %4 = bitcast <8 x i3> %3 to i24 + ret i24 %4 +} + +; +; AND/XOR/OR v3i8 as i24 +; + +define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { +; X32-SSE-LABEL: and_v3i8_as_i24: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pand %xmm0, %xmm1 +; X32-SSE-NEXT: pextrb $0, %xmm1, %eax +; X32-SSE-NEXT: pextrb $4, %xmm1, %edx +; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx +; X32-SSE-NEXT: # kill: %AL %AL %EAX +; X32-SSE-NEXT: # kill: %DL %DL %EDX +; X32-SSE-NEXT: # kill: %CL %CL %ECX +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: and_v3i8_as_i24: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: movd %ecx, %xmm0 +; X64-SSE-NEXT: pinsrb $4, %r8d, %xmm0 +; X64-SSE-NEXT: pinsrb $8, %r9d, %xmm0 +; X64-SSE-NEXT: movd %edi, %xmm1 +; X64-SSE-NEXT: pinsrb $4, %esi, %xmm1 +; X64-SSE-NEXT: pinsrb $8, %edx, %xmm1 +; X64-SSE-NEXT: pand %xmm0, %xmm1 +; X64-SSE-NEXT: pextrb $0, %xmm1, %eax +; X64-SSE-NEXT: pextrb $4, %xmm1, %edx +; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx +; X64-SSE-NEXT: # kill: %AL %AL %EAX +; X64-SSE-NEXT: # kill: %DL %DL %EDX +; X64-SSE-NEXT: # kill: %CL %CL %ECX +; X64-SSE-NEXT: retq + %1 = bitcast <3 x i8> %a to i24 + %2 = bitcast <3 x i8> %b to i24 + %3 = and i24 %1, %2 + %4 = bitcast i24 %3 to <3 x i8> + ret <3 x i8> %4 +} + +define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { +; X32-SSE-LABEL: xor_v3i8_as_i24: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pxor %xmm0, %xmm1 +; X32-SSE-NEXT: pextrb $0, %xmm1, %eax +; X32-SSE-NEXT: pextrb $4, %xmm1, %edx +; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx +; X32-SSE-NEXT: # kill: %AL %AL %EAX +; X32-SSE-NEXT: # kill: %DL %DL %EDX +; X32-SSE-NEXT: # kill: %CL %CL %ECX +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: xor_v3i8_as_i24: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: movd %ecx, %xmm0 +; X64-SSE-NEXT: pinsrb $4, %r8d, %xmm0 +; X64-SSE-NEXT: pinsrb $8, %r9d, %xmm0 +; X64-SSE-NEXT: movd %edi, %xmm1 +; X64-SSE-NEXT: pinsrb $4, %esi, %xmm1 +; X64-SSE-NEXT: pinsrb $8, %edx, %xmm1 +; X64-SSE-NEXT: pxor %xmm0, %xmm1 +; X64-SSE-NEXT: pextrb $0, %xmm1, %eax +; X64-SSE-NEXT: pextrb $4, %xmm1, %edx +; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx +; X64-SSE-NEXT: # kill: %AL %AL %EAX +; X64-SSE-NEXT: # kill: %DL %DL %EDX +; X64-SSE-NEXT: # kill: %CL %CL %ECX +; X64-SSE-NEXT: retq + %1 = bitcast <3 x i8> %a to i24 + %2 = bitcast <3 x i8> %b to i24 + %3 = xor i24 %1, %2 + %4 = bitcast i24 %3 to <3 x i8> + ret <3 x i8> %4 +} + +define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { +; X32-SSE-LABEL: or_v3i8_as_i24: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: por %xmm0, %xmm1 +; X32-SSE-NEXT: pextrb $0, %xmm1, %eax +; X32-SSE-NEXT: pextrb $4, %xmm1, %edx +; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx +; X32-SSE-NEXT: # kill: %AL %AL %EAX +; X32-SSE-NEXT: # kill: %DL %DL %EDX +; X32-SSE-NEXT: # kill: %CL %CL %ECX +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: or_v3i8_as_i24: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: movd %ecx, %xmm0 +; X64-SSE-NEXT: pinsrb $4, %r8d, %xmm0 +; X64-SSE-NEXT: pinsrb $8, %r9d, %xmm0 +; X64-SSE-NEXT: movd %edi, %xmm1 +; X64-SSE-NEXT: pinsrb $4, %esi, %xmm1 +; X64-SSE-NEXT: pinsrb $8, %edx, %xmm1 +; X64-SSE-NEXT: por %xmm0, %xmm1 +; X64-SSE-NEXT: pextrb $0, %xmm1, %eax +; X64-SSE-NEXT: pextrb $4, %xmm1, %edx +; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx +; X64-SSE-NEXT: # kill: %AL %AL %EAX +; X64-SSE-NEXT: # kill: %DL %DL %EDX +; X64-SSE-NEXT: # kill: %CL %CL %ECX +; X64-SSE-NEXT: retq + %1 = bitcast <3 x i8> %a to i24 + %2 = bitcast <3 x i8> %b to i24 + %3 = or i24 %1, %2 + %4 = bitcast i24 %3 to <3 x i8> + ret <3 x i8> %4 +} + +; +; AND/XOR/OR v8i3 as i24 +; + +define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { +; X32-SSE-LABEL: and_v8i3_as_i24: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: andps %xmm1, %xmm0 +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: and_v8i3_as_i24: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: andps %xmm1, %xmm0 +; X64-SSE-NEXT: retq + %1 = bitcast <8 x i3> %a to i24 + %2 = bitcast <8 x i3> %b to i24 + %3 = and i24 %1, %2 + %4 = bitcast i24 %3 to <8 x i3> + ret <8 x i3> %4 +} + +define <8 x i3> @xor_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { +; X32-SSE-LABEL: xor_v8i3_as_i24: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: xorps %xmm1, %xmm0 +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: xor_v8i3_as_i24: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: xorps %xmm1, %xmm0 +; X64-SSE-NEXT: retq + %1 = bitcast <8 x i3> %a to i24 + %2 = bitcast <8 x i3> %b to i24 + %3 = xor i24 %1, %2 + %4 = bitcast i24 %3 to <8 x i3> + ret <8 x i3> %4 +} + +define <8 x i3> @or_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { +; X32-SSE-LABEL: or_v8i3_as_i24: +; X32-SSE: # BB#0: +; X32-SSE-NEXT: orps %xmm1, %xmm0 +; X32-SSE-NEXT: retl +; +; X64-SSE-LABEL: or_v8i3_as_i24: +; X64-SSE: # BB#0: +; X64-SSE-NEXT: orps %xmm1, %xmm0 +; X64-SSE-NEXT: retq + %1 = bitcast <8 x i3> %a to i24 + %2 = bitcast <8 x i3> %b to i24 + %3 = or i24 %1, %2 + %4 = bitcast i24 %3 to <8 x i3> + ret <8 x i3> %4 +} diff --git a/tools/clang/include/clang/AST/ASTContext.h b/tools/clang/include/clang/AST/ASTContext.h index 07a8937..c485c6e 100644 --- a/tools/clang/include/clang/AST/ASTContext.h +++ b/tools/clang/include/clang/AST/ASTContext.h @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -167,6 +168,8 @@ class ASTContext : public RefCountedBase { mutable llvm::FoldingSet DependentUnaryTransformTypes; mutable llvm::FoldingSet AutoTypes; + mutable llvm::FoldingSet + DeducedTemplateSpecializationTypes; mutable llvm::FoldingSet AtomicTypes; llvm::FoldingSet AttributedTypes; mutable llvm::FoldingSet PipeTypes; @@ -404,11 +407,11 @@ class ASTContext : public RefCountedBase { llvm::DenseMap TemplateOrInstantiation; - /// \brief Keeps track of the declaration from which a UsingDecl was + /// \brief Keeps track of the declaration from which a using declaration was /// created during instantiation. /// - /// The source declaration is always a UsingDecl, an UnresolvedUsingValueDecl, - /// or an UnresolvedUsingTypenameDecl. + /// The source and target declarations are always a UsingDecl, an + /// UnresolvedUsingValueDecl, or an UnresolvedUsingTypenameDecl. /// /// For example: /// \code @@ -427,7 +430,7 @@ class ASTContext : public RefCountedBase { /// /// This mapping will contain an entry that maps from the UsingDecl in /// B to the UnresolvedUsingDecl in B. - llvm::DenseMap InstantiatedFromUsingDecl; + llvm::DenseMap InstantiatedFromUsingDecl; llvm::DenseMap InstantiatedFromUsingShadowDecl; @@ -855,11 +858,11 @@ class ASTContext : public RefCountedBase { /// \brief If the given using decl \p Inst is an instantiation of a /// (possibly unresolved) using decl from a template instantiation, /// return it. - NamedDecl *getInstantiatedFromUsingDecl(UsingDecl *Inst); + NamedDecl *getInstantiatedFromUsingDecl(NamedDecl *Inst); /// \brief Remember that the using decl \p Inst is an instantiation /// of the using decl \p Pattern of a class template. - void setInstantiatedFromUsingDecl(UsingDecl *Inst, NamedDecl *Pattern); + void setInstantiatedFromUsingDecl(NamedDecl *Inst, NamedDecl *Pattern); void setInstantiatedFromUsingShadowDecl(UsingShadowDecl *Inst, UsingShadowDecl *Pattern); @@ -979,7 +982,7 @@ class ASTContext : public RefCountedBase { CanQualType SingletonId; #include "clang/Basic/OpenCLImageTypes.def" CanQualType OCLSamplerTy, OCLEventTy, OCLClkEventTy; - CanQualType OCLQueueTy, OCLNDRangeTy, OCLReserveIDTy; + CanQualType OCLQueueTy, OCLReserveIDTy; CanQualType OMPArraySectionTy; // Types for deductions in C++0x [stmt.ranged]'s desugaring. Built on demand. @@ -1367,6 +1370,14 @@ class ASTContext : public RefCountedBase { ElaboratedTypeKeyword Keyword, NestedNameSpecifier *NNS, const IdentifierInfo *Name, ArrayRef Args) const; + TemplateArgument getInjectedTemplateArg(NamedDecl *ParamDecl); + + /// Get a template argument list with one argument per template parameter + /// in a template parameter list, such as for the injected class name of + /// a class template. + void getInjectedTemplateArgs(const TemplateParameterList *Params, + SmallVectorImpl &Args); + QualType getPackExpansionType(QualType Pattern, Optional NumExpansions); @@ -1418,6 +1429,11 @@ class ASTContext : public RefCountedBase { /// \brief C++11 deduction pattern for 'auto &&' type. QualType getAutoRRefDeductType() const; + /// \brief C++1z deduced class template specialization type. + QualType getDeducedTemplateSpecializationType(TemplateName Template, + QualType DeducedType, + bool IsDependent) const; + /// \brief Return the unique reference to the type for the specified TagDecl /// (struct/union/class/enum) decl. QualType getTagDeclType(const TagDecl *Decl) const; @@ -2484,6 +2500,16 @@ class ASTContext : public RefCountedBase { /// when it is called. void AddDeallocation(void (*Callback)(void*), void *Data); + /// If T isn't trivially destructible, calls AddDeallocation to register it + /// for destruction. + template + void addDestruction(T *Ptr) { + if (!std::is_trivially_destructible::value) { + auto DestroyPtr = [](void *V) { static_cast(V)->~T(); }; + AddDeallocation(DestroyPtr, Ptr); + } + } + GVALinkage GetGVALinkageForFunction(const FunctionDecl *FD) const; GVALinkage GetGVALinkageForVariable(const VarDecl *VD); @@ -2493,7 +2519,7 @@ class ASTContext : public RefCountedBase { /// /// \returns true if the function/var must be CodeGen'ed/deserialized even if /// it is not used. - bool DeclMustBeEmitted(const Decl *D); + bool DeclMustBeEmitted(const Decl *D, bool ForModularCodegen = false); const CXXConstructorDecl * getCopyConstructorForExceptionObject(CXXRecordDecl *RD); diff --git a/tools/clang/include/clang/AST/BuiltinTypes.def b/tools/clang/include/clang/AST/BuiltinTypes.def index 7b18493..86800d9 100644 --- a/tools/clang/include/clang/AST/BuiltinTypes.def +++ b/tools/clang/include/clang/AST/BuiltinTypes.def @@ -175,9 +175,6 @@ BUILTIN_TYPE(OCLClkEvent, OCLClkEventTy) // OpenCL queue_t. BUILTIN_TYPE(OCLQueue, OCLQueueTy) -// OpenCL ndrange_t. -BUILTIN_TYPE(OCLNDRange, OCLNDRangeTy) - // OpenCL reserve_id_t. BUILTIN_TYPE(OCLReserveID, OCLReserveIDTy) diff --git a/tools/clang/include/clang/AST/TypeLoc.h b/tools/clang/include/clang/AST/TypeLoc.h index 147439b..b1e274f 100644 --- a/tools/clang/include/clang/AST/TypeLoc.h +++ b/tools/clang/include/clang/AST/TypeLoc.h @@ -70,6 +70,13 @@ class TypeLoc { return t; } + /// \brief Convert to the specified TypeLoc type, returning a null TypeLoc if + /// this TypeLock is not of the desired type. It will consider type + /// adjustments from a type that wad written as a T to another type that is + /// still canonically a T (ignores parens, attributes, elaborated types, etc). + template + T getAsAdjusted() const; + /// The kinds of TypeLocs. Equivalent to the Type::TypeClass enum, /// except it also defines a Qualified enum that corresponds to the /// QualifiedLoc class. @@ -1351,6 +1358,19 @@ class FunctionTypeLoc : public ConcreteTypeLoc { + bool hasExceptionSpec() const { + if (auto *FPT = dyn_cast(getTypePtr())) { + return FPT->hasExceptionSpec(); + } + return false; + } + + SourceRange *getExceptionSpecRangePtr() const { + assert(hasExceptionSpec() && "No exception spec range"); + // After the Info comes the ParmVarDecl array, and after that comes the + // exception specification information. + return (SourceRange *)(getParmArray() + getNumParams()); + } public: SourceLocation getLocalRangeBegin() const { return getLocalData()->LocalRangeBegin; @@ -1384,6 +1404,16 @@ class FunctionTypeLoc : public ConcreteTypeLoc getParams() const { return llvm::makeArrayRef(getParmArray(), getNumParams()); } @@ -1416,12 +1446,15 @@ class FunctionTypeLoc : public ConcreteTypeLoc { +class DeducedTypeLoc + : public InheritingConcreteTypeLoc {}; + +class AutoTypeLoc + : public InheritingConcreteTypeLoc { +}; + +class DeducedTemplateSpecializationTypeLoc + : public InheritingConcreteTypeLoc { +public: + SourceLocation getTemplateNameLoc() const { + return getNameLoc(); + } + void setTemplateNameLoc(SourceLocation Loc) { + setNameLoc(Loc); + } }; struct ElaboratedLocInfo { @@ -2146,6 +2195,24 @@ class PipeTypeLoc : public ConcreteTypeLocgetTypePtr()->getElementType(); } }; + +template +inline T TypeLoc::getAsAdjusted() const { + TypeLoc Cur = *this; + while (!T::isKind(Cur)) { + if (auto PTL = Cur.getAs()) + Cur = PTL.getInnerLoc(); + else if (auto ATL = Cur.getAs()) + Cur = ATL.getModifiedLoc(); + else if (auto ETL = Cur.getAs()) + Cur = ETL.getNamedTypeLoc(); + else if (auto ATL = Cur.getAs()) + Cur = ATL.getOriginalLoc(); + else + break; + } + return Cur.getAs(); +} } #endif diff --git a/tools/clang/include/clang/Basic/TargetInfo.h b/tools/clang/include/clang/Basic/TargetInfo.h index 63434a3..2b58a23 100644 --- a/tools/clang/include/clang/Basic/TargetInfo.h +++ b/tools/clang/include/clang/Basic/TargetInfo.h @@ -610,8 +610,16 @@ class TargetInfo : public RefCountedBase { /// \brief Returns the "normalized" GCC register name. /// - /// For example, on x86 it will return "ax" when "eax" is passed in. - StringRef getNormalizedGCCRegisterName(StringRef Name) const; + /// ReturnCannonical true will return the register name without any additions + /// such as "{}" or "%" in it's canonical form, for example: + /// ReturnCanonical = true and Name = "rax", will return "ax". + StringRef getNormalizedGCCRegisterName(StringRef Name, + bool ReturnCanonical = false) const; + + virtual StringRef getConstraintRegister(const StringRef &Constraint, + const StringRef &Expression) const { + return ""; + } struct ConstraintInfo { enum { @@ -1010,7 +1018,7 @@ class TargetInfo : public RefCountedBase { /// \brief Set supported OpenCL extensions as written on command line virtual void setOpenCLExtensionOpts() { for (const auto &Ext : getTargetOpts().OpenCLExtensionsAsWritten) { - getTargetOpts().SupportedOpenCLOptions.set(Ext); + getTargetOpts().SupportedOpenCLOptions.support(Ext); } } @@ -1029,6 +1037,21 @@ class TargetInfo : public RefCountedBase { return LangAS::opencl_global; } + /// \returns Target specific vtbl ptr address space. + virtual unsigned getVtblPtrAddressSpace() const { + return 0; + } + + /// \returns If a target requires an address within a target specific address + /// space \p AddressSpace to be converted in order to be used, then return the + /// corresponding target specific DWARF address space. + /// + /// \returns Otherwise return None and no conversion will be emitted in the + /// DWARF. + virtual Optional getDWARFAddressSpace(unsigned AddressSpace) const { + return None; + } + /// \brief Check the target is valid after it is fully initialized. virtual bool validateTarget(DiagnosticsEngine &Diags) const { return true; diff --git a/tools/clang/include/clang/Sema/DeclSpec.h b/tools/clang/include/clang/Sema/DeclSpec.h index 64d2d08..a479b46 100644 --- a/tools/clang/include/clang/Sema/DeclSpec.h +++ b/tools/clang/include/clang/Sema/DeclSpec.h @@ -520,7 +520,7 @@ class DeclSpec { SourceRange getTypeofParensRange() const { return TypeofParensRange; } void setTypeofParensRange(SourceRange range) { TypeofParensRange = range; } - bool containsPlaceholderType() const { + bool hasAutoTypeSpec() const { return (TypeSpecType == TST_auto || TypeSpecType == TST_auto_type || TypeSpecType == TST_decltype_auto); } @@ -909,7 +909,9 @@ class UnqualifiedId { /// \brief A template-id, e.g., f. IK_TemplateId, /// \brief An implicit 'self' parameter - IK_ImplicitSelfParam + IK_ImplicitSelfParam, + /// \brief A deduction-guide name (a template-name) + IK_DeductionGuideName } Kind; struct OFI { @@ -929,8 +931,8 @@ class UnqualifiedId { /// \brief Anonymous union that holds extra data associated with the /// parsed unqualified-id. union { - /// \brief When Kind == IK_Identifier, the parsed identifier, or when Kind - /// == IK_UserLiteralId, the identifier suffix. + /// \brief When Kind == IK_Identifier, the parsed identifier, or when + /// Kind == IK_UserLiteralId, the identifier suffix. IdentifierInfo *Identifier; /// \brief When Kind == IK_OperatorFunctionId, the overloaded operator @@ -948,6 +950,9 @@ class UnqualifiedId { /// \brief When Kind == IK_DestructorName, the type referred to by the /// class-name. UnionParsedType DestructorName; + + /// \brief When Kind == IK_DeductionGuideName, the parsed template-name. + UnionParsedTemplateTy TemplateName; /// \brief When Kind == IK_TemplateId or IK_ConstructorTemplateId, /// the template-id annotation that contains the template name and @@ -1086,6 +1091,18 @@ class UnqualifiedId { /// \p TemplateId and will free it on destruction. void setTemplateId(TemplateIdAnnotation *TemplateId); + /// \brief Specify that this unqualified-id was parsed as a template-name for + /// a deduction-guide. + /// + /// \param Template The parsed template-name. + /// \param TemplateLoc The location of the parsed template-name. + void setDeductionGuideName(ParsedTemplateTy Template, + SourceLocation TemplateLoc) { + Kind = IK_DeductionGuideName; + TemplateName = Template; + StartLocation = EndLocation = TemplateLoc; + } + /// \brief Return the source range that covers this unqualified-id. SourceRange getSourceRange() const LLVM_READONLY { return SourceRange(StartLocation, EndLocation); @@ -1710,6 +1727,7 @@ class Declarator { ObjCParameterContext,// An ObjC method parameter type. KNRTypeListContext, // K&R type definition list for formals. TypeNameContext, // Abstract declarator for types. + FunctionalCastContext, // Type in a C++ functional cast expression. MemberContext, // Struct/Union field. BlockContext, // Declaration within a block in a function. ForContext, // Declaration within first part of a for loop. @@ -1912,6 +1930,7 @@ class Declarator { return false; case TypeNameContext: + case FunctionalCastContext: case AliasDeclContext: case AliasTemplateContext: case PrototypeContext: @@ -1952,6 +1971,7 @@ class Declarator { return true; case TypeNameContext: + case FunctionalCastContext: case CXXNewContext: case AliasDeclContext: case AliasTemplateContext: @@ -1984,6 +2004,7 @@ class Declarator { case CXXCatchContext: case ObjCCatchContext: case TypeNameContext: + case FunctionalCastContext: case ConversionIdContext: case ObjCParameterContext: case ObjCResultContext: @@ -2022,6 +2043,7 @@ class Declarator { // These contexts don't allow any kind of non-abstract declarator. case KNRTypeListContext: case TypeNameContext: + case FunctionalCastContext: case AliasDeclContext: case AliasTemplateContext: case LambdaExprParameterContext: @@ -2079,6 +2101,7 @@ class Declarator { case CXXCatchContext: case ObjCCatchContext: case TypeNameContext: + case FunctionalCastContext: // FIXME case CXXNewContext: case AliasDeclContext: case AliasTemplateContext: @@ -2280,6 +2303,7 @@ class Declarator { case ConditionContext: case KNRTypeListContext: case TypeNameContext: + case FunctionalCastContext: case AliasDeclContext: case AliasTemplateContext: case PrototypeContext: @@ -2313,6 +2337,16 @@ class Declarator { return true; } + /// \brief Determine whether a trailing return type was written (at any + /// level) within this declarator. + bool hasTrailingReturnType() const { + for (const auto &Chunk : type_objects()) + if (Chunk.Kind == DeclaratorChunk::Function && + Chunk.Fun.hasTrailingReturnType()) + return true; + return false; + } + /// takeAttributes - Takes attributes from the given parsed-attributes /// set and add them to this declarator. /// diff --git a/tools/clang/include/clang/Serialization/ASTBitCodes.h b/tools/clang/include/clang/Serialization/ASTBitCodes.h index 0473401..105afa4 100644 --- a/tools/clang/include/clang/Serialization/ASTBitCodes.h +++ b/tools/clang/include/clang/Serialization/ASTBitCodes.h @@ -344,7 +344,7 @@ namespace clang { /// /// The TYPE_OFFSET constant describes the record that occurs /// within the AST block. The record itself is an array of offsets that - /// point into the declarations and types block (identified by + /// point into the declarations and types block (identified by /// DECLTYPES_BLOCK_ID). The index into the array is based on the ID /// of a type. For a given type ID @c T, the lower three bits of /// @c T are its qualifiers (const, volatile, restrict), as in @@ -446,10 +446,10 @@ namespace clang { /// \brief Record code for the set of ext_vector type names. EXT_VECTOR_DECLS = 16, - + /// \brief Record code for the array of unused file scoped decls. UNUSED_FILESCOPED_DECLS = 17, - + /// \brief Record code for the table of offsets to entries in the /// preprocessing record. PPD_ENTITIES_OFFSETS = 18, @@ -465,7 +465,7 @@ namespace clang { /// \brief Record code for an update to the TU's lexically contained /// declarations. TU_UPDATE_LEXICAL = 22, - + // ID 23 used to be for a list of local redeclarations. /// \brief Record code for declarations that Sema keeps references of. @@ -490,7 +490,7 @@ namespace clang { // ID 30 used to be a decl update record. These are now in the DECLTYPES // block. - + // ID 31 used to be a list of offsets to DECL_CXX_BASE_SPECIFIERS records. /// \brief Record code for \#pragma diagnostic mappings. @@ -498,7 +498,7 @@ namespace clang { /// \brief Record code for special CUDA declarations. CUDA_SPECIAL_DECL_REFS = 33, - + /// \brief Record code for header search information. HEADER_SEARCH_TABLE = 34, @@ -516,7 +516,7 @@ namespace clang { KNOWN_NAMESPACES = 38, /// \brief Record code for the remapping information used to relate - /// loaded modules to the various offsets and IDs(e.g., source location + /// loaded modules to the various offsets and IDs(e.g., source location /// offests, declaration and type IDs) that are used in that module to /// refer to other modules. MODULE_OFFSET_MAP = 39, @@ -525,20 +525,20 @@ namespace clang { /// which stores information about \#line directives. SOURCE_MANAGER_LINE_TABLE = 40, - /// \brief Record code for map of Objective-C class definition IDs to the + /// \brief Record code for map of Objective-C class definition IDs to the /// ObjC categories in a module that are attached to that class. OBJC_CATEGORIES_MAP = 41, /// \brief Record code for a file sorted array of DeclIDs in a module. FILE_SORTED_DECLS = 42, - + /// \brief Record code for an array of all of the (sub)modules that were /// imported by the AST file. IMPORTED_MODULES = 43, - + // ID 44 used to be a table of merged canonical declarations. // ID 45 used to be a list of declaration IDs of local redeclarations. - + /// \brief Record code for the array of Objective-C categories (including /// extensions). /// @@ -585,6 +585,14 @@ namespace clang { /// \brief Number of unmatched #pragma clang cuda_force_host_device begin /// directives we've seen. CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH = 57, + + /// \brief Record code for types associated with OpenCL extensions. + OPENCL_EXTENSION_TYPES = 58, + + /// \brief Record code for declarations associated with OpenCL extensions. + OPENCL_EXTENSION_DECLS = 59, + + MODULAR_CODEGEN_DECLS = 60, }; /// \brief Record types used within a source manager block. @@ -795,14 +803,12 @@ namespace clang { PREDEF_TYPE_SAMPLER_ID = 39, /// \brief OpenCL queue type. PREDEF_TYPE_QUEUE_ID = 40, - /// \brief OpenCL ndrange type. - PREDEF_TYPE_NDRANGE_ID = 41, /// \brief OpenCL reserve_id type. - PREDEF_TYPE_RESERVE_ID_ID = 42, + PREDEF_TYPE_RESERVE_ID_ID = 41, /// \brief The placeholder type for OpenMP array section. - PREDEF_TYPE_OMP_ARRAY_SECTION = 43, + PREDEF_TYPE_OMP_ARRAY_SECTION = 42, /// \brief The '__float128' type - PREDEF_TYPE_FLOAT128_ID = 44, + PREDEF_TYPE_FLOAT128_ID = 43, /// \brief OpenCL image types with auto numeration #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ PREDEF_TYPE_##Id##_ID, @@ -912,7 +918,9 @@ namespace clang { /// \brief A PipeType record. TYPE_PIPE = 43, /// \brief An ObjCTypeParamType record. - TYPE_OBJC_TYPE_PARAM = 44 + TYPE_OBJC_TYPE_PARAM = 44, + /// \brief A DeducedTemplateSpecializationType record. + TYPE_DEDUCED_TEMPLATE_SPECIALIZATION = 45 }; /// \brief The type IDs for special types constructed by semantic @@ -1107,6 +1115,8 @@ namespace clang { DECL_NAMESPACE_ALIAS, /// \brief A UsingDecl record. DECL_USING, + /// \brief A UsingPackDecl record. + DECL_USING_PACK, /// \brief A UsingShadowDecl record. DECL_USING_SHADOW, /// \brief A ConstructorUsingShadowDecl record. @@ -1123,6 +1133,8 @@ namespace clang { DECL_EXPORT, /// \brief A CXXRecordDecl record. DECL_CXX_RECORD, + /// \brief A CXXDeductionGuideDecl record. + DECL_CXX_DEDUCTION_GUIDE, /// \brief A CXXMethodDecl record. DECL_CXX_METHOD, /// \brief A CXXConstructorDecl record. @@ -1515,6 +1527,11 @@ namespace clang { STMT_OMP_TEAMS_DISTRIBUTE_SIMD_DIRECTIVE, STMT_OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE, STMT_OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE, + STMT_OMP_TARGET_TEAMS_DIRECTIVE, + STMT_OMP_TARGET_TEAMS_DISTRIBUTE_DIRECTIVE, + STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE, + STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE, + STMT_OMP_TARGET_TEAMS_DISTRIBUTE_SIMD_DIRECTIVE, EXPR_OMP_ARRAY_SECTION, // ARC @@ -1621,7 +1638,8 @@ namespace clang { IdentifierInfo *getIdentifier() const { assert(Kind == DeclarationName::Identifier || - Kind == DeclarationName::CXXLiteralOperatorName); + Kind == DeclarationName::CXXLiteralOperatorName || + Kind == DeclarationName::CXXDeductionGuideName); return (IdentifierInfo *)Data; } Selector getSelector() const { diff --git a/tools/clang/lib/AST/ASTContext.cpp b/tools/clang/lib/AST/ASTContext.cpp index a418b08..88c46db 100644 --- a/tools/clang/lib/AST/ASTContext.cpp +++ b/tools/clang/lib/AST/ASTContext.cpp @@ -704,8 +704,8 @@ static const LangAS::Map *getAddressSpaceMap(const TargetInfo &T, // language-specific address space. static const unsigned FakeAddrSpaceMap[] = { 1, // opencl_global - 2, // opencl_local - 3, // opencl_constant + 3, // opencl_local + 2, // opencl_constant 4, // opencl_generic 5, // cuda_device 6, // cuda_constant @@ -1184,7 +1184,6 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, InitBuiltinType(OCLEventTy, BuiltinType::OCLEvent); InitBuiltinType(OCLClkEventTy, BuiltinType::OCLClkEvent); InitBuiltinType(OCLQueueTy, BuiltinType::OCLQueue); - InitBuiltinType(OCLNDRangeTy, BuiltinType::OCLNDRange); InitBuiltinType(OCLReserveIDTy, BuiltinType::OCLReserveID); } @@ -1287,9 +1286,8 @@ void ASTContext::setClassScopeSpecializationPattern(FunctionDecl *FD, } NamedDecl * -ASTContext::getInstantiatedFromUsingDecl(UsingDecl *UUD) { - llvm::DenseMap::const_iterator Pos - = InstantiatedFromUsingDecl.find(UUD); +ASTContext::getInstantiatedFromUsingDecl(NamedDecl *UUD) { + auto Pos = InstantiatedFromUsingDecl.find(UUD); if (Pos == InstantiatedFromUsingDecl.end()) return nullptr; @@ -1297,11 +1295,15 @@ ASTContext::getInstantiatedFromUsingDecl(UsingDecl *UUD) { } void -ASTContext::setInstantiatedFromUsingDecl(UsingDecl *Inst, NamedDecl *Pattern) { +ASTContext::setInstantiatedFromUsingDecl(NamedDecl *Inst, NamedDecl *Pattern) { assert((isa(Pattern) || isa(Pattern) || isa(Pattern)) && "pattern decl is not a using decl"); + assert((isa(Inst) || + isa(Inst) || + isa(Inst)) && + "instantiation did not produce a using decl"); assert(!InstantiatedFromUsingDecl[Inst] && "pattern already exists"); InstantiatedFromUsingDecl[Inst] = Pattern; } @@ -1472,7 +1474,9 @@ CharUnits ASTContext::getDeclAlign(const Decl *D, bool ForAlignof) const { T = getPointerType(RT->getPointeeType()); } QualType BaseT = getBaseElementType(T); - if (!BaseT->isIncompleteType() && !T->isFunctionType()) { + if (T->isFunctionType()) + Align = getTypeInfoImpl(T.getTypePtr()).Align; + else if (!BaseT->isIncompleteType()) { // Adjust alignments of declarations with array type by the // large-array alignment on the target. if (const ArrayType *arrayType = getAsArrayType(T)) { @@ -1486,6 +1490,8 @@ CharUnits ASTContext::getDeclAlign(const Decl *D, bool ForAlignof) const { } } Align = std::max(Align, getPreferredTypeAlign(T.getTypePtr())); + if (BaseT.getQualifiers().hasUnaligned()) + Align = Target->getCharWidth(); if (const VarDecl *VD = dyn_cast(D)) { if (VD->hasGlobalStorage() && !ForAlignof) Align = std::max(Align, getTargetInfo().getMinGlobalAlign()); @@ -1792,7 +1798,6 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: // Currently these types are pointers to opaque types. Width = Target->getPointerWidth(0); @@ -1894,8 +1899,9 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { return getTypeInfo(cast(T)-> getReplacementType().getTypePtr()); - case Type::Auto: { - const AutoType *A = cast(T); + case Type::Auto: + case Type::DeducedTemplateSpecialization: { + const DeducedType *A = cast(T); assert(!A->getDeducedType().isNull() && "cannot request the size of an undeduced or dependent auto type"); return getTypeInfo(A->getDeducedType().getTypePtr()); @@ -2782,6 +2788,7 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const { case Type::TemplateTypeParm: case Type::SubstTemplateTypeParmPack: case Type::Auto: + case Type::DeducedTemplateSpecialization: case Type::PackExpansion: llvm_unreachable("type should never be variably-modified"); @@ -3802,12 +3809,8 @@ QualType ASTContext::getDependentNameType(ElaboratedTypeKeyword Keyword, QualType Canon) const { if (Canon.isNull()) { NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS); - ElaboratedTypeKeyword CanonKeyword = Keyword; - if (Keyword == ETK_None) - CanonKeyword = ETK_Typename; - - if (CanonNNS != NNS || CanonKeyword != Keyword) - Canon = getDependentNameType(CanonKeyword, CanonNNS, Name); + if (CanonNNS != NNS) + Canon = getDependentNameType(Keyword, CanonNNS, Name); } llvm::FoldingSetNodeID ID; @@ -3891,6 +3894,47 @@ ASTContext::getDependentTemplateSpecializationType( return QualType(T, 0); } +TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) { + TemplateArgument Arg; + if (auto *TTP = dyn_cast(Param)) { + QualType ArgType = getTypeDeclType(TTP); + if (TTP->isParameterPack()) + ArgType = getPackExpansionType(ArgType, None); + + Arg = TemplateArgument(ArgType); + } else if (auto *NTTP = dyn_cast(Param)) { + Expr *E = new (*this) DeclRefExpr( + NTTP, /*enclosing*/false, + NTTP->getType().getNonLValueExprType(*this), + Expr::getValueKindForType(NTTP->getType()), NTTP->getLocation()); + + if (NTTP->isParameterPack()) + E = new (*this) PackExpansionExpr(DependentTy, E, NTTP->getLocation(), + None); + Arg = TemplateArgument(E); + } else { + auto *TTP = cast(Param); + if (TTP->isParameterPack()) + Arg = TemplateArgument(TemplateName(TTP), Optional()); + else + Arg = TemplateArgument(TemplateName(TTP)); + } + + if (Param->isTemplateParameterPack()) + Arg = TemplateArgument::CreatePackCopy(*this, Arg); + + return Arg; +} + +void +ASTContext::getInjectedTemplateArgs(const TemplateParameterList *Params, + SmallVectorImpl &Args) { + Args.reserve(Args.size() + Params->size()); + + for (NamedDecl *Param : *Params) + Args.push_back(getInjectedTemplateArg(Param)); +} + QualType ASTContext::getPackExpansionType(QualType Pattern, Optional NumExpansions) { llvm::FoldingSetNodeID ID; @@ -4418,6 +4462,28 @@ QualType ASTContext::getAutoType(QualType DeducedType, AutoTypeKeyword Keyword, return QualType(AT, 0); } +/// Return the uniqued reference to the deduced template specialization type +/// which has been deduced to the given type, or to the canonical undeduced +/// such type, or the canonical deduced-but-dependent such type. +QualType ASTContext::getDeducedTemplateSpecializationType( + TemplateName Template, QualType DeducedType, bool IsDependent) const { + // Look in the folding set for an existing type. + void *InsertPos = nullptr; + llvm::FoldingSetNodeID ID; + DeducedTemplateSpecializationType::Profile(ID, Template, DeducedType, + IsDependent); + if (DeducedTemplateSpecializationType *DTST = + DeducedTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(DTST, 0); + + DeducedTemplateSpecializationType *DTST = new (*this, TypeAlignment) + DeducedTemplateSpecializationType(Template, DeducedType, IsDependent); + Types.push_back(DTST); + if (InsertPos) + DeducedTemplateSpecializationTypes.InsertNode(DTST, InsertPos); + return QualType(DTST, 0); +} + /// getAtomicType - Return the uniqued reference to the atomic type for /// the given value type. QualType ASTContext::getAtomicType(QualType T) const { @@ -5906,7 +5972,6 @@ static char getObjCEncodingForPrimitiveKind(const ASTContext *C, case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: case BuiltinType::OCLSampler: case BuiltinType::Dependent: @@ -6321,6 +6386,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S, // We could see an undeduced auto type here during error recovery. // Just ignore it. case Type::Auto: + case Type::DeducedTemplateSpecialization: return; case Type::Pipe: @@ -8040,7 +8106,8 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, // mismatch. if (LQuals.getCVRQualifiers() != RQuals.getCVRQualifiers() || LQuals.getAddressSpace() != RQuals.getAddressSpace() || - LQuals.getObjCLifetime() != RQuals.getObjCLifetime()) + LQuals.getObjCLifetime() != RQuals.getObjCLifetime() || + LQuals.hasUnaligned() != RQuals.hasUnaligned()) return QualType(); // Exactly one GC qualifier difference is allowed: __strong is @@ -8120,6 +8187,7 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, llvm_unreachable("Non-canonical and dependent types shouldn't get here"); case Type::Auto: + case Type::DeducedTemplateSpecialization: case Type::LValueReference: case Type::RValueReference: case Type::MemberPointer: @@ -8755,8 +8823,8 @@ QualType ASTContext::GetBuiltinType(unsigned Id, bool Variadic = (TypeStr[0] == '.'); - // We really shouldn't be making a no-proto type here, especially in C++. - if (ArgTypes.empty() && Variadic) + // We really shouldn't be making a no-proto type here. + if (ArgTypes.empty() && Variadic && !getLangOpts().CPlusPlus) return getFunctionNoProtoType(ResType, EI); FunctionProtoType::ExtProtoInfo EPI; @@ -8774,7 +8842,7 @@ static GVALinkage basicGVALinkageForFunction(const ASTContext &Context, if (!FD->isExternallyVisible()) return GVA_Internal; - GVALinkage External = GVA_StrongExternal; + GVALinkage External; switch (FD->getTemplateSpecializationKind()) { case TSK_Undeclared: case TSK_ExplicitSpecialization: @@ -8846,8 +8914,22 @@ static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context, } GVALinkage ASTContext::GetGVALinkageForFunction(const FunctionDecl *FD) const { - return adjustGVALinkageForAttributes( + auto L = adjustGVALinkageForAttributes( *this, basicGVALinkageForFunction(*this, FD), FD); + auto EK = ExternalASTSource::EK_ReplyHazy; + if (auto *Ext = getExternalSource()) + EK = Ext->hasExternalDefinitions(FD->getOwningModuleID()); + switch (EK) { + case ExternalASTSource::EK_Never: + if (L == GVA_DiscardableODR) + return GVA_StrongODR; + break; + case ExternalASTSource::EK_Always: + return GVA_AvailableExternally; + case ExternalASTSource::EK_ReplyHazy: + break; + } + return L; } static GVALinkage basicGVALinkageForVariable(const ASTContext &Context, @@ -8856,22 +8938,30 @@ static GVALinkage basicGVALinkageForVariable(const ASTContext &Context, return GVA_Internal; if (VD->isStaticLocal()) { - GVALinkage StaticLocalLinkage = GVA_DiscardableODR; const DeclContext *LexicalContext = VD->getParentFunctionOrMethod(); while (LexicalContext && !isa(LexicalContext)) LexicalContext = LexicalContext->getLexicalParent(); - // Let the static local variable inherit its linkage from the nearest - // enclosing function. - if (LexicalContext) - StaticLocalLinkage = - Context.GetGVALinkageForFunction(cast(LexicalContext)); + // ObjC Blocks can create local variables that don't have a FunctionDecl + // LexicalContext. + if (!LexicalContext) + return GVA_DiscardableODR; + + // Otherwise, let the static local variable inherit its linkage from the + // nearest enclosing function. + auto StaticLocalLinkage = + Context.GetGVALinkageForFunction(cast(LexicalContext)); - // GVA_StrongODR function linkage is stronger than what we need, - // downgrade to GVA_DiscardableODR. - // This allows us to discard the variable if we never end up needing it. - return StaticLocalLinkage == GVA_StrongODR ? GVA_DiscardableODR - : StaticLocalLinkage; + // Itanium ABI 5.2.2: "Each COMDAT group [for a static local variable] must + // be emitted in any object with references to the symbol for the object it + // contains, whether inline or out-of-line." + // Similar behavior is observed with MSVC. An alternative ABI could use + // StrongODR/AvailableExternally to match the function, but none are + // known/supported currently. + if (StaticLocalLinkage == GVA_StrongODR || + StaticLocalLinkage == GVA_AvailableExternally) + return GVA_DiscardableODR; + return StaticLocalLinkage; } // MSVC treats in-class initialized static data members as definitions. @@ -8924,7 +9014,7 @@ GVALinkage ASTContext::GetGVALinkageForVariable(const VarDecl *VD) { *this, basicGVALinkageForVariable(*this, VD), VD); } -bool ASTContext::DeclMustBeEmitted(const Decl *D) { +bool ASTContext::DeclMustBeEmitted(const Decl *D, bool ForModularCodegen) { if (const VarDecl *VD = dyn_cast(D)) { if (!VD->isFileVarDecl()) return false; @@ -8988,10 +9078,15 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { } } + GVALinkage Linkage = GetGVALinkageForFunction(FD); + + if (Linkage == GVA_DiscardableODR && ForModularCodegen) + return true; + // static, static inline, always_inline, and extern inline functions can // always be deferred. Normal inline functions can be deferred in C99/C++. // Implicit template instantiations can also be deferred in C++. - return !isDiscardableGVALinkage(GetGVALinkageForFunction(FD)); + return !isDiscardableGVALinkage(Linkage); } const VarDecl *VD = cast(D); @@ -9011,7 +9106,8 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { // Variables that have initialization with side-effects are required. if (VD->getInit() && VD->getInit()->HasSideEffects(*this) && - !VD->evaluateValue()) + // We can get a value-dependent initializer during error recovery. + (VD->getInit()->isValueDependent() || !VD->evaluateValue())) return true; // Likewise, variables with tuple-like bindings are required if their diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp index 2876525..a22446a 100644 --- a/tools/clang/lib/AST/ExprConstant.cpp +++ b/tools/clang/lib/AST/ExprConstant.cpp @@ -109,19 +109,57 @@ namespace { return getAsBaseOrMember(E).getInt(); } + /// Given a CallExpr, try to get the alloc_size attribute. May return null. + static const AllocSizeAttr *getAllocSizeAttr(const CallExpr *CE) { + const FunctionDecl *Callee = CE->getDirectCallee(); + return Callee ? Callee->getAttr() : nullptr; + } + + /// Attempts to unwrap a CallExpr (with an alloc_size attribute) from an Expr. + /// This will look through a single cast. + /// + /// Returns null if we couldn't unwrap a function with alloc_size. + static const CallExpr *tryUnwrapAllocSizeCall(const Expr *E) { + if (!E->getType()->isPointerType()) + return nullptr; + + E = E->IgnoreParens(); + // If we're doing a variable assignment from e.g. malloc(N), there will + // probably be a cast of some kind. Ignore it. + if (const auto *Cast = dyn_cast(E)) + E = Cast->getSubExpr()->IgnoreParens(); + + if (const auto *CE = dyn_cast(E)) + return getAllocSizeAttr(CE) ? CE : nullptr; + return nullptr; + } + + /// Determines whether or not the given Base contains a call to a function + /// with the alloc_size attribute. + static bool isBaseAnAllocSizeCall(APValue::LValueBase Base) { + const auto *E = Base.dyn_cast(); + return E && E->getType()->isPointerType() && tryUnwrapAllocSizeCall(E); + } + + /// Determines if an LValue with the given LValueBase will have an unsized + /// array in its designator. /// Find the path length and type of the most-derived subobject in the given /// path, and find the size of the containing array, if any. - static - unsigned findMostDerivedSubobject(ASTContext &Ctx, QualType Base, - ArrayRef Path, - uint64_t &ArraySize, QualType &Type, - bool &IsArray) { + static unsigned + findMostDerivedSubobject(ASTContext &Ctx, APValue::LValueBase Base, + ArrayRef Path, + uint64_t &ArraySize, QualType &Type, bool &IsArray) { + // This only accepts LValueBases from APValues, and APValues don't support + // arrays that lack size info. + assert(!isBaseAnAllocSizeCall(Base) && + "Unsized arrays shouldn't appear here"); unsigned MostDerivedLength = 0; - Type = Base; + Type = getType(Base); + for (unsigned I = 0, N = Path.size(); I != N; ++I) { if (Type->isArrayType()) { const ConstantArrayType *CAT = - cast(Ctx.getAsArrayType(Type)); + cast(Ctx.getAsArrayType(Type)); Type = CAT->getElementType(); ArraySize = CAT->getSize().getZExtValue(); MostDerivedLength = I + 1; @@ -162,17 +200,23 @@ namespace { /// Is this a pointer one past the end of an object? unsigned IsOnePastTheEnd : 1; + /// Indicator of whether the first entry is an unsized array. + unsigned FirstEntryIsAnUnsizedArray : 1; + /// Indicator of whether the most-derived object is an array element. unsigned MostDerivedIsArrayElement : 1; /// The length of the path to the most-derived object of which this is a /// subobject. - unsigned MostDerivedPathLength : 29; + unsigned MostDerivedPathLength : 28; /// The size of the array of which the most-derived object is an element. /// This will always be 0 if the most-derived object is not an array /// element. 0 is not an indicator of whether or not the most-derived object /// is an array, however, because 0-length arrays are allowed. + /// + /// If the current array is an unsized array, the value of this is + /// undefined. uint64_t MostDerivedArraySize; /// The type of the most derived object referred to by this address. @@ -187,23 +231,24 @@ namespace { explicit SubobjectDesignator(QualType T) : Invalid(false), IsOnePastTheEnd(false), - MostDerivedIsArrayElement(false), MostDerivedPathLength(0), - MostDerivedArraySize(0), MostDerivedType(T) {} + FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), + MostDerivedPathLength(0), MostDerivedArraySize(0), + MostDerivedType(T) {} SubobjectDesignator(ASTContext &Ctx, const APValue &V) : Invalid(!V.isLValue() || !V.hasLValuePath()), IsOnePastTheEnd(false), - MostDerivedIsArrayElement(false), MostDerivedPathLength(0), - MostDerivedArraySize(0) { + FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false), + MostDerivedPathLength(0), MostDerivedArraySize(0) { + assert(V.isLValue() && "Non-LValue used to make an LValue designator?"); if (!Invalid) { IsOnePastTheEnd = V.isLValueOnePastTheEnd(); ArrayRef VEntries = V.getLValuePath(); Entries.insert(Entries.end(), VEntries.begin(), VEntries.end()); if (V.getLValueBase()) { bool IsArray = false; - MostDerivedPathLength = - findMostDerivedSubobject(Ctx, getType(V.getLValueBase()), - V.getLValuePath(), MostDerivedArraySize, - MostDerivedType, IsArray); + MostDerivedPathLength = findMostDerivedSubobject( + Ctx, V.getLValueBase(), V.getLValuePath(), MostDerivedArraySize, + MostDerivedType, IsArray); MostDerivedIsArrayElement = IsArray; } } @@ -214,12 +259,26 @@ namespace { Entries.clear(); } + /// Determine whether the most derived subobject is an array without a + /// known bound. + bool isMostDerivedAnUnsizedArray() const { + assert(!Invalid && "Calling this makes no sense on invalid designators"); + return Entries.size() == 1 && FirstEntryIsAnUnsizedArray; + } + + /// Determine what the most derived array's size is. Results in an assertion + /// failure if the most derived array lacks a size. + uint64_t getMostDerivedArraySize() const { + assert(!isMostDerivedAnUnsizedArray() && "Unsized array has no size"); + return MostDerivedArraySize; + } + /// Determine whether this is a one-past-the-end pointer. bool isOnePastTheEnd() const { assert(!Invalid); if (IsOnePastTheEnd) return true; - if (MostDerivedIsArrayElement && + if (!isMostDerivedAnUnsizedArray() && MostDerivedIsArrayElement && Entries[MostDerivedPathLength - 1].ArrayIndex == MostDerivedArraySize) return true; return false; @@ -247,6 +306,21 @@ namespace { MostDerivedArraySize = CAT->getSize().getZExtValue(); MostDerivedPathLength = Entries.size(); } + /// Update this designator to refer to the first element within the array of + /// elements of type T. This is an array of unknown size. + void addUnsizedArrayUnchecked(QualType ElemTy) { + PathEntry Entry; + Entry.ArrayIndex = 0; + Entries.push_back(Entry); + + MostDerivedType = ElemTy; + MostDerivedIsArrayElement = true; + // The value in MostDerivedArraySize is undefined in this case. So, set it + // to an arbitrary value that's likely to loudly break things if it's + // used. + MostDerivedArraySize = std::numeric_limits::max() / 2; + MostDerivedPathLength = Entries.size(); + } /// Update this designator to refer to the given base or member of this /// object. void addDeclUnchecked(const Decl *D, bool Virtual = false) { @@ -276,30 +350,48 @@ namespace { MostDerivedArraySize = 2; MostDerivedPathLength = Entries.size(); } - void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, uint64_t N); + void diagnosePointerArithmetic(EvalInfo &Info, const Expr *E, APSInt N); /// Add N to the address of this subobject. - void adjustIndex(EvalInfo &Info, const Expr *E, uint64_t N) { - if (Invalid) return; - if (MostDerivedPathLength == Entries.size() && - MostDerivedIsArrayElement) { - Entries.back().ArrayIndex += N; - if (Entries.back().ArrayIndex > MostDerivedArraySize) { - diagnosePointerArithmetic(Info, E, Entries.back().ArrayIndex); - setInvalid(); - } + void adjustIndex(EvalInfo &Info, const Expr *E, APSInt N) { + if (Invalid || !N) return; + uint64_t TruncatedN = N.extOrTrunc(64).getZExtValue(); + if (isMostDerivedAnUnsizedArray()) { + // Can't verify -- trust that the user is doing the right thing (or if + // not, trust that the caller will catch the bad behavior). + // FIXME: Should we reject if this overflows, at least? + Entries.back().ArrayIndex += TruncatedN; return; } + // [expr.add]p4: For the purposes of these operators, a pointer to a // nonarray object behaves the same as a pointer to the first element of // an array of length one with the type of the object as its element type. - if (IsOnePastTheEnd && N == (uint64_t)-1) - IsOnePastTheEnd = false; - else if (!IsOnePastTheEnd && N == 1) - IsOnePastTheEnd = true; - else if (N != 0) { - diagnosePointerArithmetic(Info, E, uint64_t(IsOnePastTheEnd) + N); + bool IsArray = MostDerivedPathLength == Entries.size() && + MostDerivedIsArrayElement; + uint64_t ArrayIndex = + IsArray ? Entries.back().ArrayIndex : (uint64_t)IsOnePastTheEnd; + uint64_t ArraySize = + IsArray ? getMostDerivedArraySize() : (uint64_t)1; + + if (N < -(int64_t)ArrayIndex || N > ArraySize - ArrayIndex) { + // Calculate the actual index in a wide enough type, so we can include + // it in the note. + N = N.extend(std::max(N.getBitWidth() + 1, 65)); + (llvm::APInt&)N += ArrayIndex; + assert(N.ugt(ArraySize) && "bounds check failed for in-bounds index"); + diagnosePointerArithmetic(Info, E, N); setInvalid(); + return; } + + ArrayIndex += TruncatedN; + assert(ArrayIndex <= ArraySize && + "bounds check succeeded for out-of-bounds index"); + + if (IsArray) + Entries.back().ArrayIndex = ArrayIndex; + else + IsOnePastTheEnd = (ArrayIndex != 0); } }; @@ -333,6 +425,17 @@ namespace { /// Index - The call index of this call. unsigned Index; + // FIXME: Adding this to every 'CallStackFrame' may have a nontrivial impact + // on the overall stack usage of deeply-recursing constexpr evaluataions. + // (We should cache this map rather than recomputing it repeatedly.) + // But let's try this and see how it goes; we can look into caching the map + // as a later change. + + /// LambdaCaptureFields - Mapping from captured variables/this to + /// corresponding data members in the closure class. + llvm::DenseMap LambdaCaptureFields; + FieldDecl *LambdaThisCaptureField; + CallStackFrame(EvalInfo &Info, SourceLocation CallLoc, const FunctionDecl *Callee, const LValue *This, APValue *Arguments); @@ -524,9 +627,17 @@ namespace { /// gets a chance to look at it. EM_PotentialConstantExpressionUnevaluated, - /// Evaluate as a constant expression. Continue evaluating if we find a - /// MemberExpr with a base that can't be evaluated. - EM_DesignatorFold, + /// Evaluate as a constant expression. In certain scenarios, if: + /// - we find a MemberExpr with a base that can't be evaluated, or + /// - we find a variable initialized with a call to a function that has + /// the alloc_size attribute on it + /// then we may consider evaluation to have succeeded. + /// + /// In either case, the LValue returned shall have an invalid base; in the + /// former, the base will be the invalid MemberExpr, in the latter, the + /// base will be either the alloc_size CallExpr or a CastExpr wrapping + /// said CallExpr. + EM_OffsetFold, } EvalMode; /// Are we checking whether the expression is a potential constant @@ -628,7 +739,7 @@ namespace { case EM_PotentialConstantExpression: case EM_ConstantExpressionUnevaluated: case EM_PotentialConstantExpressionUnevaluated: - case EM_DesignatorFold: + case EM_OffsetFold: HasActiveDiagnostic = false; return OptionalDiagnostic(); } @@ -720,7 +831,7 @@ namespace { case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: case EM_ConstantFold: - case EM_DesignatorFold: + case EM_OffsetFold: return false; } llvm_unreachable("Missed EvalMode case"); @@ -739,7 +850,7 @@ namespace { case EM_EvaluateForOverflow: case EM_IgnoreSideEffects: case EM_ConstantFold: - case EM_DesignatorFold: + case EM_OffsetFold: return true; case EM_PotentialConstantExpression: @@ -775,7 +886,7 @@ namespace { case EM_ConstantExpressionUnevaluated: case EM_ConstantFold: case EM_IgnoreSideEffects: - case EM_DesignatorFold: + case EM_OffsetFold: return false; } llvm_unreachable("Missed EvalMode case"); @@ -804,10 +915,6 @@ namespace { return KeepGoing; } - bool allowInvalidBaseExpr() const { - return EvalMode == EM_DesignatorFold; - } - class ArrayInitLoopIndex { EvalInfo &Info; uint64_t OuterIndex; @@ -856,11 +963,10 @@ namespace { struct FoldOffsetRAII { EvalInfo &Info; EvalInfo::EvaluationMode OldMode; - explicit FoldOffsetRAII(EvalInfo &Info, bool Subobject) + explicit FoldOffsetRAII(EvalInfo &Info) : Info(Info), OldMode(Info.EvalMode) { if (!Info.checkingPotentialConstantExpression()) - Info.EvalMode = Subobject ? EvalInfo::EM_DesignatorFold - : EvalInfo::EM_ConstantFold; + Info.EvalMode = EvalInfo::EM_OffsetFold; } ~FoldOffsetRAII() { Info.EvalMode = OldMode; } @@ -965,14 +1071,16 @@ bool SubobjectDesignator::checkSubobject(EvalInfo &Info, const Expr *E, } void SubobjectDesignator::diagnosePointerArithmetic(EvalInfo &Info, - const Expr *E, uint64_t N) { + const Expr *E, APSInt N) { + // If we're complaining, we must be able to statically determine the size of + // the most derived array. if (MostDerivedPathLength == Entries.size() && MostDerivedIsArrayElement) Info.CCEDiag(E, diag::note_constexpr_array_index) - << static_cast(N) << /*array*/ 0 - << static_cast(MostDerivedArraySize); + << N << /*array*/ 0 + << static_cast(getMostDerivedArraySize()); else Info.CCEDiag(E, diag::note_constexpr_array_index) - << static_cast(N) << /*non-array*/ 1; + << N << /*non-array*/ 1; setInvalid(); } @@ -1102,12 +1210,16 @@ namespace { if (Designator.Invalid) V = APValue(Base, Offset, APValue::NoLValuePath(), CallIndex, IsNullPtr); - else + else { + assert(!InvalidBase && "APValues can't handle invalid LValue bases"); + assert(!Designator.FirstEntryIsAnUnsizedArray && + "Unsized array with a valid base?"); V = APValue(Base, Offset, Designator.Entries, Designator.IsOnePastTheEnd, CallIndex, IsNullPtr); + } } void setFrom(ASTContext &Ctx, const APValue &V) { - assert(V.isLValue()); + assert(V.isLValue() && "Setting LValue from a non-LValue?"); Base = V.getLValueBase(); Offset = V.getLValueOffset(); InvalidBase = false; @@ -1118,6 +1230,15 @@ namespace { void set(APValue::LValueBase B, unsigned I = 0, bool BInvalid = false, bool IsNullPtr_ = false, uint64_t Offset_ = 0) { +#ifndef NDEBUG + // We only allow a few types of invalid bases. Enforce that here. + if (BInvalid) { + const auto *E = B.get(); + assert((isa(E) || tryUnwrapAllocSizeCall(E)) && + "Unexpected type of invalid base"); + } +#endif + Base = B; Offset = CharUnits::fromQuantity(Offset_); InvalidBase = BInvalid; @@ -1157,6 +1278,13 @@ namespace { if (checkSubobject(Info, E, isa(D) ? CSK_Field : CSK_Base)) Designator.addDeclUnchecked(D, Virtual); } + void addUnsizedArray(EvalInfo &Info, QualType ElemTy) { + assert(Designator.Entries.empty() && getType(Base)->isPointerType()); + assert(isBaseAnAllocSizeCall(Base) && + "Only alloc_size bases can have unsized arrays"); + Designator.FirstEntryIsAnUnsizedArray = true; + Designator.addUnsizedArrayUnchecked(ElemTy); + } void addArray(EvalInfo &Info, const Expr *E, const ConstantArrayType *CAT) { if (checkSubobject(Info, E, CSK_ArrayToPointer)) Designator.addArrayUnchecked(CAT); @@ -1168,14 +1296,24 @@ namespace { void clearIsNullPointer() { IsNullPtr = false; } - void adjustOffsetAndIndex(EvalInfo &Info, const Expr *E, uint64_t Index, + void adjustOffsetAndIndex(EvalInfo &Info, const Expr *E, APSInt Index, CharUnits ElementSize) { - // Compute the new offset in the appropriate width. - Offset += Index * ElementSize; - if (Index && checkNullPointer(Info, E, CSK_ArrayIndex)) + // An index of 0 has no effect. (In C, adding 0 to a null pointer is UB, + // but we're not required to diagnose it and it's valid in C++.) + if (!Index) + return; + + // Compute the new offset in the appropriate width, wrapping at 64 bits. + // FIXME: When compiling for a 32-bit target, we should use 32-bit + // offsets. + uint64_t Offset64 = Offset.getQuantity(); + uint64_t ElemSize64 = ElementSize.getQuantity(); + uint64_t Index64 = Index.extOrTrunc(64).getZExtValue(); + Offset = CharUnits::fromQuantity(Offset64 + ElemSize64 * Index64); + + if (checkNullPointer(Info, E, CSK_ArrayIndex)) Designator.adjustIndex(Info, E, Index); - if (Index) - clearIsNullPointer(); + clearIsNullPointer(); } void adjustOffset(CharUnits N) { Offset += N; @@ -1287,8 +1425,10 @@ static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E); static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, const Expr *E, bool AllowNonLiteralTypes = false); -static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info); -static bool EvaluatePointer(const Expr *E, LValue &Result, EvalInfo &Info); +static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info, + bool InvalidBaseOK = false); +static bool EvaluatePointer(const Expr *E, LValue &Result, EvalInfo &Info, + bool InvalidBaseOK = false); static bool EvaluateMemberPointer(const Expr *E, MemberPtr &Result, EvalInfo &Info); static bool EvaluateTemporary(const Expr *E, LValue &Result, EvalInfo &Info); @@ -1297,13 +1437,24 @@ static bool EvaluateIntegerOrLValue(const Expr *E, APValue &Result, EvalInfo &Info); static bool EvaluateFloat(const Expr *E, APFloat &Result, EvalInfo &Info); static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info); -static bool EvaluateAtomic(const Expr *E, APValue &Result, EvalInfo &Info); +static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result, + EvalInfo &Info); static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result); //===----------------------------------------------------------------------===// // Misc utilities //===----------------------------------------------------------------------===// +/// Negate an APSInt in place, converting it to a signed form if necessary, and +/// preserving its value (by extending by up to one bit as needed). +static void negateAsSigned(APSInt &Int) { + if (Int.isUnsigned() || Int.isMinSignedValue()) { + Int = Int.extend(Int.getBitWidth() + 1); + Int.setIsSigned(true); + } + Int = -Int; +} + /// Produce a string describing the given constexpr call. static void describeCall(CallStackFrame *Frame, raw_ostream &Out) { unsigned ArgIndex = 0; @@ -1351,13 +1502,6 @@ static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) { return true; } -/// Sign- or zero-extend a value to 64 bits. If it's already 64 bits, just -/// return its existing value. -static int64_t getExtValue(const APSInt &Value) { - return Value.isSigned() ? Value.getSExtValue() - : static_cast(Value.getZExtValue()); -} - /// Should this call expression be treated as a string literal? static bool IsStringLiteralCall(const CallExpr *E) { unsigned Builtin = E->getBuiltinCallee(); @@ -1520,8 +1664,17 @@ static bool CheckLiteralType(EvalInfo &Info, const Expr *E, // C++1y: A constant initializer for an object o [...] may also invoke // constexpr constructors for o and its subobjects even if those objects // are of non-literal class types. - if (Info.getLangOpts().CPlusPlus14 && This && - Info.EvaluatingDecl == This->getLValueBase()) + // + // C++11 missed this detail for aggregates, so classes like this: + // struct foo_t { union { int i; volatile int j; } u; }; + // are not (obviously) initializable like so: + // __attribute__((__require_constant_initialization__)) + // static const foo_t x = {{0}}; + // because "i" is a subobject with non-literal initialization (due to the + // volatile member of the union). See: + // http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1677 + // Therefore, we use the C++1y behavior. + if (This && Info.EvaluatingDecl == This->getLValueBase()) return true; // Prvalue constant expressions must be of literal types. @@ -2104,7 +2257,7 @@ static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc, /// \param Adjustment - The adjustment, in objects of type EltTy, to add. static bool HandleLValueArrayAdjustment(EvalInfo &Info, const Expr *E, LValue &LVal, QualType EltTy, - int64_t Adjustment) { + APSInt Adjustment) { CharUnits SizeOfPointee; if (!HandleSizeof(Info, E->getExprLoc(), EltTy, SizeOfPointee)) return false; @@ -2113,6 +2266,13 @@ static bool HandleLValueArrayAdjustment(EvalInfo &Info, const Expr *E, return true; } +static bool HandleLValueArrayAdjustment(EvalInfo &Info, const Expr *E, + LValue &LVal, QualType EltTy, + int64_t Adjustment) { + return HandleLValueArrayAdjustment(Info, E, LVal, EltTy, + APSInt::get(Adjustment)); +} + /// Update an lvalue to refer to a component of a complex number. /// \param Info - Information about the ongoing evaluation. /// \param LVal - The lvalue to be updated. @@ -2131,6 +2291,10 @@ static bool HandleLValueComplexElement(EvalInfo &Info, const Expr *E, return true; } +static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, + QualType Type, const LValue &LVal, + APValue &RVal); + /// Try to evaluate the initializer for a variable declaration. /// /// \param Info Information about the ongoing evaluation. @@ -2142,6 +2306,7 @@ static bool HandleLValueComplexElement(EvalInfo &Info, const Expr *E, static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E, const VarDecl *VD, CallStackFrame *Frame, APValue *&Result) { + // If this is a parameter to an active constexpr function call, perform // argument substitution. if (const ParmVarDecl *PVD = dyn_cast(VD)) { @@ -2246,7 +2411,14 @@ static unsigned getBaseIndex(const CXXRecordDecl *Derived, /// Extract the value of a character from a string literal. static APSInt extractStringLiteralCharacter(EvalInfo &Info, const Expr *Lit, uint64_t Index) { - // FIXME: Support ObjCEncodeExpr, MakeStringConstant + // FIXME: Support MakeStringConstant + if (const auto *ObjCEnc = dyn_cast(Lit)) { + std::string Str; + Info.Ctx.getObjCEncodingForType(ObjCEnc->getEncodedType(), Str); + assert(Index <= Str.size() && "Index too large"); + return APSInt::getUnsigned(Str.c_str()[Index]); + } + if (auto PE = dyn_cast(Lit)) Lit = PE->getFunctionName(); const StringLiteral *S = cast(Lit); @@ -2821,6 +2993,9 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, } else { Info.CCEDiag(E); } + } else if (BaseType.isConstQualified() && VD->hasDefinition(Info.Ctx)) { + Info.CCEDiag(E, diag::note_constexpr_ltor_non_constexpr) << VD; + // Keep evaluating to see what we can do. } else { // FIXME: Allow folding of values of any literal type in all languages. if (Info.checkingPotentialConstantExpression() && @@ -3065,9 +3240,9 @@ struct CompoundAssignSubobjectHandler { return false; } - int64_t Offset = getExtValue(RHS.getInt()); + APSInt Offset = RHS.getInt(); if (Opcode == BO_Sub) - Offset = -Offset; + negateAsSigned(Offset); LValue LVal; LVal.setFrom(Info.Ctx, Subobj); @@ -4022,6 +4197,10 @@ static bool HandleFunctionCall(SourceLocation CallLoc, return false; This->moveInto(Result); return true; + } else if (MD && isLambdaCallOperator(MD)) { + // We're in a lambda; determine the lambda capture field maps. + MD->getParent()->getCaptureFields(Frame.LambdaCaptureFields, + Frame.LambdaThisCaptureField); } StmtResult Ret = {Result, ResultSlot}; @@ -4433,6 +4612,12 @@ class ExprEvaluatorBase Call.getLValueBase().dyn_cast()); if (!FD) return Error(Callee); + // Don't call function pointers which have been cast to some other type. + // Per DR (no number yet), the caller and callee can differ in noexcept. + if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec( + CalleeType->getPointeeType(), FD->getType())) { + return Error(E); + } // Overloaded operator calls to member functions are represented as normal // calls with '*this' as the first argument. @@ -4448,14 +4633,42 @@ class ExprEvaluatorBase return false; This = &ThisVal; Args = Args.slice(1); + } else if (MD && MD->isLambdaStaticInvoker()) { + // Map the static invoker for the lambda back to the call operator. + // Conveniently, we don't have to slice out the 'this' argument (as is + // being done for the non-static case), since a static member function + // doesn't have an implicit argument passed in. + const CXXRecordDecl *ClosureClass = MD->getParent(); + assert( + ClosureClass->captures_begin() == ClosureClass->captures_end() && + "Number of captures must be zero for conversion to function-ptr"); + + const CXXMethodDecl *LambdaCallOp = + ClosureClass->getLambdaCallOperator(); + + // Set 'FD', the function that will be called below, to the call + // operator. If the closure object represents a generic lambda, find + // the corresponding specialization of the call operator. + + if (ClosureClass->isGenericLambda()) { + assert(MD->isFunctionTemplateSpecialization() && + "A generic lambda's static-invoker function must be a " + "template specialization"); + const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs(); + FunctionTemplateDecl *CallOpTemplate = + LambdaCallOp->getDescribedFunctionTemplate(); + void *InsertPos = nullptr; + FunctionDecl *CorrespondingCallOpSpecialization = + CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos); + assert(CorrespondingCallOpSpecialization && + "We must always have a function call operator specialization " + "that corresponds to our static invoker specialization"); + FD = cast(CorrespondingCallOpSpecialization); + } else + FD = LambdaCallOp; } - // Don't call function pointers which have been cast to some other type. - // Per DR (no number yet), the caller and callee can differ in noexcept. - if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec( - CalleeType->getPointeeType(), FD->getType())) { - return Error(E); - } + } else return Error(E); @@ -4531,7 +4744,10 @@ class ExprEvaluatorBase case CK_AtomicToNonAtomic: { APValue AtomicVal; - if (!EvaluateAtomic(E->getSubExpr(), AtomicVal, Info)) + // This does not need to be done in place even for class/array types: + // atomic-to-non-atomic conversion implies copying the object + // representation. + if (!Evaluate(AtomicVal, Info, E->getSubExpr())) return false; return DerivedSuccess(AtomicVal, E); } @@ -4643,6 +4859,7 @@ class LValueExprEvaluatorBase : public ExprEvaluatorBase { protected: LValue &Result; + bool InvalidBaseOK; typedef LValueExprEvaluatorBase LValueExprEvaluatorBaseTy; typedef ExprEvaluatorBase ExprEvaluatorBaseTy; @@ -4651,9 +4868,14 @@ class LValueExprEvaluatorBase return true; } + bool evaluatePointer(const Expr *E, LValue &Result) { + return EvaluatePointer(E, Result, this->Info, InvalidBaseOK); + } + public: - LValueExprEvaluatorBase(EvalInfo &Info, LValue &Result) : - ExprEvaluatorBaseTy(Info), Result(Result) {} + LValueExprEvaluatorBase(EvalInfo &Info, LValue &Result, bool InvalidBaseOK) + : ExprEvaluatorBaseTy(Info), Result(Result), + InvalidBaseOK(InvalidBaseOK) {} bool Success(const APValue &V, const Expr *E) { Result.setFrom(this->Info.Ctx, V); @@ -4665,7 +4887,7 @@ class LValueExprEvaluatorBase QualType BaseTy; bool EvalOK; if (E->isArrow()) { - EvalOK = EvaluatePointer(E->getBase(), Result, this->Info); + EvalOK = evaluatePointer(E->getBase(), Result); BaseTy = E->getBase()->getType()->castAs()->getPointeeType(); } else if (E->getBase()->isRValue()) { assert(E->getBase()->getType()->isRecordType()); @@ -4676,7 +4898,7 @@ class LValueExprEvaluatorBase BaseTy = E->getBase()->getType(); } if (!EvalOK) { - if (!this->Info.allowInvalidBaseExpr()) + if (!InvalidBaseOK) return false; Result.setInvalid(E); return true; @@ -4770,8 +4992,8 @@ namespace { class LValueExprEvaluator : public LValueExprEvaluatorBase { public: - LValueExprEvaluator(EvalInfo &Info, LValue &Result) : - LValueExprEvaluatorBaseTy(Info, Result) {} + LValueExprEvaluator(EvalInfo &Info, LValue &Result, bool InvalidBaseOK) : + LValueExprEvaluatorBaseTy(Info, Result, InvalidBaseOK) {} bool VisitVarDecl(const Expr *E, const VarDecl *VD); bool VisitUnaryPreIncDec(const UnaryOperator *UO); @@ -4824,10 +5046,11 @@ class LValueExprEvaluator /// * function designators in C, and /// * "extern void" objects /// * @selector() expressions in Objective-C -static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info) { +static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info, + bool InvalidBaseOK) { assert(E->isGLValue() || E->getType()->isFunctionType() || E->getType()->isVoidType() || isa(E)); - return LValueExprEvaluator(Info, Result).Visit(E); + return LValueExprEvaluator(Info, Result, InvalidBaseOK).Visit(E); } bool LValueExprEvaluator::VisitDeclRefExpr(const DeclRefExpr *E) { @@ -4842,6 +5065,33 @@ bool LValueExprEvaluator::VisitDeclRefExpr(const DeclRefExpr *E) { bool LValueExprEvaluator::VisitVarDecl(const Expr *E, const VarDecl *VD) { + + // If we are within a lambda's call operator, check whether the 'VD' referred + // to within 'E' actually represents a lambda-capture that maps to a + // data-member/field within the closure object, and if so, evaluate to the + // field or what the field refers to. + if (Info.CurrentCall && isLambdaCallOperator(Info.CurrentCall->Callee)) { + if (auto *FD = Info.CurrentCall->LambdaCaptureFields.lookup(VD)) { + if (Info.checkingPotentialConstantExpression()) + return false; + // Start with 'Result' referring to the complete closure object... + Result = *Info.CurrentCall->This; + // ... then update it to refer to the field of the closure object + // that represents the capture. + if (!HandleLValueMember(Info, E, Result, FD)) + return false; + // And if the field is of reference type, update 'Result' to refer to what + // the field refers to. + if (FD->getType()->isReferenceType()) { + APValue RVal; + if (!handleLValueToRValueConversion(Info, E, FD->getType(), Result, + RVal)) + return false; + Result.setFrom(Info.Ctx, RVal); + } + return true; + } + } CallStackFrame *Frame = nullptr; if (VD->hasLocalStorage() && Info.CurrentCall->Index > 1) { // Only if a local variable was declared in the function currently being @@ -4988,19 +5238,18 @@ bool LValueExprEvaluator::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) { if (E->getBase()->getType()->isVectorType()) return Error(E); - if (!EvaluatePointer(E->getBase(), Result, Info)) + if (!evaluatePointer(E->getBase(), Result)) return false; APSInt Index; if (!EvaluateInteger(E->getIdx(), Index, Info)) return false; - return HandleLValueArrayAdjustment(Info, E, Result, E->getType(), - getExtValue(Index)); + return HandleLValueArrayAdjustment(Info, E, Result, E->getType(), Index); } bool LValueExprEvaluator::VisitUnaryDeref(const UnaryOperator *E) { - return EvaluatePointer(E->getSubExpr(), Result, Info); + return evaluatePointer(E->getSubExpr(), Result); } bool LValueExprEvaluator::VisitUnaryReal(const UnaryOperator *E) { @@ -5079,19 +5328,130 @@ bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) { // Pointer Evaluation //===----------------------------------------------------------------------===// +/// \brief Attempts to compute the number of bytes available at the pointer +/// returned by a function with the alloc_size attribute. Returns true if we +/// were successful. Places an unsigned number into `Result`. +/// +/// This expects the given CallExpr to be a call to a function with an +/// alloc_size attribute. +static bool getBytesReturnedByAllocSizeCall(const ASTContext &Ctx, + const CallExpr *Call, + llvm::APInt &Result) { + const AllocSizeAttr *AllocSize = getAllocSizeAttr(Call); + + // alloc_size args are 1-indexed, 0 means not present. + assert(AllocSize && AllocSize->getElemSizeParam() != 0); + unsigned SizeArgNo = AllocSize->getElemSizeParam() - 1; + unsigned BitsInSizeT = Ctx.getTypeSize(Ctx.getSizeType()); + if (Call->getNumArgs() <= SizeArgNo) + return false; + + auto EvaluateAsSizeT = [&](const Expr *E, APSInt &Into) { + if (!E->EvaluateAsInt(Into, Ctx, Expr::SE_AllowSideEffects)) + return false; + if (Into.isNegative() || !Into.isIntN(BitsInSizeT)) + return false; + Into = Into.zextOrSelf(BitsInSizeT); + return true; + }; + + APSInt SizeOfElem; + if (!EvaluateAsSizeT(Call->getArg(SizeArgNo), SizeOfElem)) + return false; + + if (!AllocSize->getNumElemsParam()) { + Result = std::move(SizeOfElem); + return true; + } + + APSInt NumberOfElems; + // Argument numbers start at 1 + unsigned NumArgNo = AllocSize->getNumElemsParam() - 1; + if (!EvaluateAsSizeT(Call->getArg(NumArgNo), NumberOfElems)) + return false; + + bool Overflow; + llvm::APInt BytesAvailable = SizeOfElem.umul_ov(NumberOfElems, Overflow); + if (Overflow) + return false; + + Result = std::move(BytesAvailable); + return true; +} + +/// \brief Convenience function. LVal's base must be a call to an alloc_size +/// function. +static bool getBytesReturnedByAllocSizeCall(const ASTContext &Ctx, + const LValue &LVal, + llvm::APInt &Result) { + assert(isBaseAnAllocSizeCall(LVal.getLValueBase()) && + "Can't get the size of a non alloc_size function"); + const auto *Base = LVal.getLValueBase().get(); + const CallExpr *CE = tryUnwrapAllocSizeCall(Base); + return getBytesReturnedByAllocSizeCall(Ctx, CE, Result); +} + +/// \brief Attempts to evaluate the given LValueBase as the result of a call to +/// a function with the alloc_size attribute. If it was possible to do so, this +/// function will return true, make Result's Base point to said function call, +/// and mark Result's Base as invalid. +static bool evaluateLValueAsAllocSize(EvalInfo &Info, APValue::LValueBase Base, + LValue &Result) { + if (Base.isNull()) + return false; + + // Because we do no form of static analysis, we only support const variables. + // + // Additionally, we can't support parameters, nor can we support static + // variables (in the latter case, use-before-assign isn't UB; in the former, + // we have no clue what they'll be assigned to). + const auto *VD = + dyn_cast_or_null(Base.dyn_cast()); + if (!VD || !VD->isLocalVarDecl() || !VD->getType().isConstQualified()) + return false; + + const Expr *Init = VD->getAnyInitializer(); + if (!Init) + return false; + + const Expr *E = Init->IgnoreParens(); + if (!tryUnwrapAllocSizeCall(E)) + return false; + + // Store E instead of E unwrapped so that the type of the LValue's base is + // what the user wanted. + Result.setInvalid(E); + + QualType Pointee = E->getType()->castAs()->getPointeeType(); + Result.addUnsizedArray(Info, Pointee); + return true; +} + namespace { class PointerExprEvaluator : public ExprEvaluatorBase { LValue &Result; + bool InvalidBaseOK; bool Success(const Expr *E) { Result.set(E); return true; } + + bool evaluateLValue(const Expr *E, LValue &Result) { + return EvaluateLValue(E, Result, Info, InvalidBaseOK); + } + + bool evaluatePointer(const Expr *E, LValue &Result) { + return EvaluatePointer(E, Result, Info, InvalidBaseOK); + } + + bool visitNonBuiltinCallExpr(const CallExpr *E); public: - PointerExprEvaluator(EvalInfo &info, LValue &Result) - : ExprEvaluatorBaseTy(info), Result(Result) {} + PointerExprEvaluator(EvalInfo &info, LValue &Result, bool InvalidBaseOK) + : ExprEvaluatorBaseTy(info), Result(Result), + InvalidBaseOK(InvalidBaseOK) {} bool Success(const APValue &V, const Expr *E) { Result.setFrom(Info.Ctx, V); @@ -5131,6 +5491,27 @@ class PointerExprEvaluator return false; } Result = *Info.CurrentCall->This; + // If we are inside a lambda's call operator, the 'this' expression refers + // to the enclosing '*this' object (either by value or reference) which is + // either copied into the closure object's field that represents the '*this' + // or refers to '*this'. + if (isLambdaCallOperator(Info.CurrentCall->Callee)) { + // Update 'Result' to refer to the data member/field of the closure object + // that represents the '*this' capture. + if (!HandleLValueMember(Info, E, Result, + Info.CurrentCall->LambdaThisCaptureField)) + return false; + // If we captured '*this' by reference, replace the field with its referent. + if (Info.CurrentCall->LambdaThisCaptureField->getType() + ->isPointerType()) { + APValue RVal; + if (!handleLValueToRValueConversion(Info, E, E->getType(), Result, + RVal)) + return false; + + Result.setFrom(Info.Ctx, RVal); + } + } return true; } @@ -5138,9 +5519,10 @@ class PointerExprEvaluator }; } // end anonymous namespace -static bool EvaluatePointer(const Expr* E, LValue& Result, EvalInfo &Info) { +static bool EvaluatePointer(const Expr* E, LValue& Result, EvalInfo &Info, + bool InvalidBaseOK) { assert(E->isRValue() && E->getType()->hasPointerRepresentation()); - return PointerExprEvaluator(Info, Result).Visit(E); + return PointerExprEvaluator(Info, Result, InvalidBaseOK).Visit(E); } bool PointerExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { @@ -5153,7 +5535,7 @@ bool PointerExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { if (IExp->getType()->isPointerType()) std::swap(PExp, IExp); - bool EvalPtrOK = EvaluatePointer(PExp, Result, Info); + bool EvalPtrOK = evaluatePointer(PExp, Result); if (!EvalPtrOK && !Info.noteFailure()) return false; @@ -5161,17 +5543,15 @@ bool PointerExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { if (!EvaluateInteger(IExp, Offset, Info) || !EvalPtrOK) return false; - int64_t AdditionalOffset = getExtValue(Offset); if (E->getOpcode() == BO_Sub) - AdditionalOffset = -AdditionalOffset; + negateAsSigned(Offset); QualType Pointee = PExp->getType()->castAs()->getPointeeType(); - return HandleLValueArrayAdjustment(Info, E, Result, Pointee, - AdditionalOffset); + return HandleLValueArrayAdjustment(Info, E, Result, Pointee, Offset); } bool PointerExprEvaluator::VisitUnaryAddrOf(const UnaryOperator *E) { - return EvaluateLValue(E->getSubExpr(), Result, Info); + return evaluateLValue(E->getSubExpr(), Result); } bool PointerExprEvaluator::VisitCastExpr(const CastExpr* E) { @@ -5205,7 +5585,7 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr* E) { case CK_DerivedToBase: case CK_UncheckedDerivedToBase: - if (!EvaluatePointer(E->getSubExpr(), Result, Info)) + if (!evaluatePointer(E->getSubExpr(), Result)) return false; if (!Result.Base && Result.Offset.isZero()) return true; @@ -5252,7 +5632,7 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr* E) { } case CK_ArrayToPointerDecay: if (SubExpr->isGLValue()) { - if (!EvaluateLValue(SubExpr, Result, Info)) + if (!evaluateLValue(SubExpr, Result)) return false; } else { Result.set(SubExpr, Info.CurrentCall->Index); @@ -5269,7 +5649,21 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr* E) { return true; case CK_FunctionToPointerDecay: - return EvaluateLValue(SubExpr, Result, Info); + return evaluateLValue(SubExpr, Result); + + case CK_LValueToRValue: { + LValue LVal; + if (!evaluateLValue(E->getSubExpr(), LVal)) + return false; + + APValue RVal; + // Note, we use the subexpression's type in order to retain cv-qualifiers. + if (!handleLValueToRValueConversion(Info, E, E->getSubExpr()->getType(), + LVal, RVal)) + return InvalidBaseOK && + evaluateLValueAsAllocSize(Info, LVal.Base, Result); + return Success(RVal, E); + } } return ExprEvaluatorBaseTy::VisitCastExpr(E); @@ -5283,6 +5677,8 @@ static CharUnits GetAlignOfType(EvalInfo &Info, QualType T) { T = Ref->getPointeeType(); // __alignof is defined to return the preferred alignment. + if (T.getQualifiers().hasUnaligned()) + return CharUnits::One(); return Info.Ctx.toCharUnitsFromBits( Info.Ctx.getPreferredTypeAlign(T.getTypePtr())); } @@ -5307,6 +5703,20 @@ static CharUnits GetAlignOfExpr(EvalInfo &Info, const Expr *E) { return GetAlignOfType(Info, E->getType()); } +// To be clear: this happily visits unsupported builtins. Better name welcomed. +bool PointerExprEvaluator::visitNonBuiltinCallExpr(const CallExpr *E) { + if (ExprEvaluatorBaseTy::VisitCallExpr(E)) + return true; + + if (!(InvalidBaseOK && getAllocSizeAttr(E))) + return false; + + Result.setInvalid(E); + QualType PointeeTy = E->getType()->castAs()->getPointeeType(); + Result.addUnsizedArray(Info, PointeeTy); + return true; +} + bool PointerExprEvaluator::VisitCallExpr(const CallExpr *E) { if (IsStringLiteralCall(E)) return Success(E); @@ -5314,33 +5724,33 @@ bool PointerExprEvaluator::VisitCallExpr(const CallExpr *E) { if (unsigned BuiltinOp = E->getBuiltinCallee()) return VisitBuiltinCallExpr(E, BuiltinOp); - return ExprEvaluatorBaseTy::VisitCallExpr(E); + return visitNonBuiltinCallExpr(E); } bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { switch (BuiltinOp) { case Builtin::BI__builtin_addressof: - return EvaluateLValue(E->getArg(0), Result, Info); + return evaluateLValue(E->getArg(0), Result); case Builtin::BI__builtin_assume_aligned: { // We need to be very careful here because: if the pointer does not have the // asserted alignment, then the behavior is undefined, and undefined // behavior is non-constant. - if (!EvaluatePointer(E->getArg(0), Result, Info)) + if (!evaluatePointer(E->getArg(0), Result)) return false; LValue OffsetResult(Result); APSInt Alignment; if (!EvaluateInteger(E->getArg(1), Alignment, Info)) return false; - CharUnits Align = CharUnits::fromQuantity(getExtValue(Alignment)); + CharUnits Align = CharUnits::fromQuantity(Alignment.getZExtValue()); if (E->getNumArgs() > 2) { APSInt Offset; if (!EvaluateInteger(E->getArg(2), Offset, Info)) return false; - int64_t AdditionalOffset = -getExtValue(Offset); + int64_t AdditionalOffset = -Offset.getZExtValue(); OffsetResult.Offset += CharUnits::fromQuantity(AdditionalOffset); } @@ -5357,12 +5767,11 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, if (BaseAlignment < Align) { Result.Designator.setInvalid(); - // FIXME: Quantities here cast to integers because the plural modifier - // does not work on APSInts yet. + // FIXME: Add support to Diagnostic for long / long long. CCEDiag(E->getArg(0), diag::note_constexpr_baa_insufficient_alignment) << 0 - << (int) BaseAlignment.getQuantity() - << (unsigned) getExtValue(Alignment); + << (unsigned)BaseAlignment.getQuantity() + << (unsigned)Align.getQuantity(); return false; } } @@ -5370,18 +5779,14 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, // The offset must also have the correct alignment. if (OffsetResult.Offset.alignTo(Align) != OffsetResult.Offset) { Result.Designator.setInvalid(); - APSInt Offset(64, false); - Offset = OffsetResult.Offset.getQuantity(); - - if (OffsetResult.Base) - CCEDiag(E->getArg(0), - diag::note_constexpr_baa_insufficient_alignment) << 1 - << (int) getExtValue(Offset) << (unsigned) getExtValue(Alignment); - else - CCEDiag(E->getArg(0), - diag::note_constexpr_baa_value_insufficient_alignment) - << Offset << (unsigned) getExtValue(Alignment); + (OffsetResult.Base + ? CCEDiag(E->getArg(0), + diag::note_constexpr_baa_insufficient_alignment) << 1 + : CCEDiag(E->getArg(0), + diag::note_constexpr_baa_value_insufficient_alignment)) + << (int)OffsetResult.Offset.getQuantity() + << (unsigned)Align.getQuantity(); return false; } @@ -5402,6 +5807,7 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case Builtin::BI__builtin_strchr: case Builtin::BI__builtin_wcschr: case Builtin::BI__builtin_memchr: + case Builtin::BI__builtin_char_memchr: case Builtin::BI__builtin_wmemchr: { if (!Visit(E->getArg(0))) return false; @@ -5439,6 +5845,7 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, // Fall through. case Builtin::BImemchr: case Builtin::BI__builtin_memchr: + case Builtin::BI__builtin_char_memchr: // memchr compares by converting both sides to unsigned char. That's also // correct for strchr if we get this far (to cope with plain char being // unsigned in the strchr case). @@ -5473,7 +5880,7 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, } default: - return ExprEvaluatorBaseTy::VisitCallExpr(E); + return visitNonBuiltinCallExpr(E); } } @@ -5596,6 +6003,7 @@ namespace { bool VisitCXXConstructExpr(const CXXConstructExpr *E) { return VisitCXXConstructExpr(E, E->getType()); } + bool VisitLambdaExpr(const LambdaExpr *E); bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T); bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E); @@ -5930,6 +6338,47 @@ bool RecordExprEvaluator::VisitCXXStdInitializerListExpr( return true; } +bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { + const CXXRecordDecl *ClosureClass = E->getLambdaClass(); + if (ClosureClass->isInvalidDecl()) return false; + + if (Info.checkingPotentialConstantExpression()) return true; + + const size_t NumFields = + std::distance(ClosureClass->field_begin(), ClosureClass->field_end()); + + assert(NumFields == (size_t)std::distance(E->capture_init_begin(), + E->capture_init_end()) && + "The number of lambda capture initializers should equal the number of " + "fields within the closure type"); + + Result = APValue(APValue::UninitStruct(), /*NumBases*/0, NumFields); + // Iterate through all the lambda's closure object's fields and initialize + // them. + auto *CaptureInitIt = E->capture_init_begin(); + const LambdaCapture *CaptureIt = ClosureClass->captures_begin(); + bool Success = true; + for (const auto *Field : ClosureClass->fields()) { + assert(CaptureInitIt != E->capture_init_end()); + // Get the initializer for this field + Expr *const CurFieldInit = *CaptureInitIt++; + + // If there is no initializer, either this is a VLA or an error has + // occurred. + if (!CurFieldInit) + return Error(E); + + APValue &FieldVal = Result.getStructField(Field->getFieldIndex()); + if (!EvaluateInPlace(FieldVal, Info, This, CurFieldInit)) { + if (!Info.keepEvaluatingAfterFailure()) + return false; + Success = false; + } + ++CaptureIt; + } + return Success; +} + static bool EvaluateRecord(const Expr *E, const LValue &This, APValue &Result, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isRecordType() && @@ -5949,7 +6398,7 @@ class TemporaryExprEvaluator : public LValueExprEvaluatorBase { public: TemporaryExprEvaluator(EvalInfo &Info, LValue &Result) : - LValueExprEvaluatorBaseTy(Info, Result) {} + LValueExprEvaluatorBaseTy(Info, Result, false) {} /// Visit an expression which constructs the value of this temporary. bool VisitConstructExpr(const Expr *E) { @@ -5979,6 +6428,9 @@ class TemporaryExprEvaluator bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E) { return VisitConstructExpr(E); } + bool VisitLambdaExpr(const LambdaExpr *E) { + return VisitConstructExpr(E); + } }; } // end anonymous namespace @@ -6512,8 +6964,6 @@ class IntExprEvaluator bool VisitCXXNoexceptExpr(const CXXNoexceptExpr *E); bool VisitSizeOfPackExpr(const SizeOfPackExpr *E); -private: - bool TryEvaluateBuiltinObjectSize(const CallExpr *E, unsigned Type); // FIXME: Missing: array subscript of vector, member of vector }; } // end anonymous namespace @@ -6646,7 +7096,6 @@ static int EvaluateBuiltinClassifyType(const CallExpr *E, case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: case BuiltinType::Dependent: llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type"); @@ -6705,6 +7154,7 @@ static int EvaluateBuiltinClassifyType(const CallExpr *E, case Type::Vector: case Type::ExtVector: case Type::Auto: + case Type::DeducedTemplateSpecialization: case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: @@ -6786,7 +7236,7 @@ static QualType getObjectType(APValue::LValueBase B) { } /// A more selective version of E->IgnoreParenCasts for -/// TryEvaluateBuiltinObjectSize. This ignores some casts/parens that serve only +/// tryEvaluateBuiltinObjectSize. This ignores some casts/parens that serve only /// to change the type of E. /// Ex. For E = `(short*)((char*)(&foo))`, returns `&foo` /// @@ -6853,82 +7303,197 @@ static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) { } } + unsigned I = 0; QualType BaseType = getType(Base); - for (int I = 0, E = LVal.Designator.Entries.size(); I != E; ++I) { + if (LVal.Designator.FirstEntryIsAnUnsizedArray) { + assert(isBaseAnAllocSizeCall(Base) && + "Unsized array in non-alloc_size call?"); + // If this is an alloc_size base, we should ignore the initial array index + ++I; + BaseType = BaseType->castAs()->getPointeeType(); + } + + for (unsigned E = LVal.Designator.Entries.size(); I != E; ++I) { + const auto &Entry = LVal.Designator.Entries[I]; if (BaseType->isArrayType()) { // Because __builtin_object_size treats arrays as objects, we can ignore // the index iff this is the last array in the Designator. if (I + 1 == E) return true; - auto *CAT = cast(Ctx.getAsArrayType(BaseType)); - uint64_t Index = LVal.Designator.Entries[I].ArrayIndex; + const auto *CAT = cast(Ctx.getAsArrayType(BaseType)); + uint64_t Index = Entry.ArrayIndex; if (Index + 1 != CAT->getSize()) return false; BaseType = CAT->getElementType(); } else if (BaseType->isAnyComplexType()) { - auto *CT = BaseType->castAs(); - uint64_t Index = LVal.Designator.Entries[I].ArrayIndex; + const auto *CT = BaseType->castAs(); + uint64_t Index = Entry.ArrayIndex; if (Index != 1) return false; BaseType = CT->getElementType(); - } else if (auto *FD = getAsField(LVal.Designator.Entries[I])) { + } else if (auto *FD = getAsField(Entry)) { bool Invalid; if (!IsLastOrInvalidFieldDecl(FD, Invalid)) return Invalid; BaseType = FD->getType(); } else { - assert(getAsBaseClass(LVal.Designator.Entries[I]) != nullptr && - "Expecting cast to a base class"); + assert(getAsBaseClass(Entry) && "Expecting cast to a base class"); return false; } } return true; } -/// Tests to see if the LValue has a designator (that isn't necessarily valid). +/// Tests to see if the LValue has a user-specified designator (that isn't +/// necessarily valid). Note that this always returns 'true' if the LValue has +/// an unsized array as its first designator entry, because there's currently no +/// way to tell if the user typed *foo or foo[0]. static bool refersToCompleteObject(const LValue &LVal) { - if (LVal.Designator.Invalid || !LVal.Designator.Entries.empty()) + if (LVal.Designator.Invalid) return false; + if (!LVal.Designator.Entries.empty()) + return LVal.Designator.isMostDerivedAnUnsizedArray(); + if (!LVal.InvalidBase) return true; - auto *E = LVal.Base.dyn_cast(); - (void)E; - assert(E != nullptr && isa(E)); - return false; + // If `E` is a MemberExpr, then the first part of the designator is hiding in + // the LValueBase. + const auto *E = LVal.Base.dyn_cast(); + return !E || !isa(E); +} + +/// Attempts to detect a user writing into a piece of memory that's impossible +/// to figure out the size of by just using types. +static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const LValue &LVal) { + const SubobjectDesignator &Designator = LVal.Designator; + // Notes: + // - Users can only write off of the end when we have an invalid base. Invalid + // bases imply we don't know where the memory came from. + // - We used to be a bit more aggressive here; we'd only be conservative if + // the array at the end was flexible, or if it had 0 or 1 elements. This + // broke some common standard library extensions (PR30346), but was + // otherwise seemingly fine. It may be useful to reintroduce this behavior + // with some sort of whitelist. OTOH, it seems that GCC is always + // conservative with the last element in structs (if it's an array), so our + // current behavior is more compatible than a whitelisting approach would + // be. + return LVal.InvalidBase && + Designator.Entries.size() == Designator.MostDerivedPathLength && + Designator.MostDerivedIsArrayElement && + isDesignatorAtObjectEnd(Ctx, LVal); +} + +/// Converts the given APInt to CharUnits, assuming the APInt is unsigned. +/// Fails if the conversion would cause loss of precision. +static bool convertUnsignedAPIntToCharUnits(const llvm::APInt &Int, + CharUnits &Result) { + auto CharUnitsMax = std::numeric_limits::max(); + if (Int.ugt(CharUnitsMax)) + return false; + Result = CharUnits::fromQuantity(Int.getZExtValue()); + return true; } -/// Tries to evaluate the __builtin_object_size for @p E. If successful, returns -/// true and stores the result in @p Size. +/// Helper for tryEvaluateBuiltinObjectSize -- Given an LValue, this will +/// determine how many bytes exist from the beginning of the object to either +/// the end of the current subobject, or the end of the object itself, depending +/// on what the LValue looks like + the value of Type. /// -/// If @p WasError is non-null, this will report whether the failure to evaluate -/// is to be treated as an Error in IntExprEvaluator. -static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type, - EvalInfo &Info, uint64_t &Size, - bool *WasError = nullptr) { - if (WasError != nullptr) - *WasError = false; - - auto Error = [&](const Expr *E) { - if (WasError != nullptr) - *WasError = true; - return false; - }; +/// If this returns false, the value of Result is undefined. +static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc, + unsigned Type, const LValue &LVal, + CharUnits &EndOffset) { + bool DetermineForCompleteObject = refersToCompleteObject(LVal); - auto Success = [&](uint64_t S, const Expr *E) { - Size = S; - return true; + auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) { + if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType()) + return false; + return HandleSizeof(Info, ExprLoc, Ty, Result); }; + // We want to evaluate the size of the entire object. This is a valid fallback + // for when Type=1 and the designator is invalid, because we're asked for an + // upper-bound. + if (!(Type & 1) || LVal.Designator.Invalid || DetermineForCompleteObject) { + // Type=3 wants a lower bound, so we can't fall back to this. + if (Type == 3 && !DetermineForCompleteObject) + return false; + + llvm::APInt APEndOffset; + if (isBaseAnAllocSizeCall(LVal.getLValueBase()) && + getBytesReturnedByAllocSizeCall(Info.Ctx, LVal, APEndOffset)) + return convertUnsignedAPIntToCharUnits(APEndOffset, EndOffset); + + if (LVal.InvalidBase) + return false; + + QualType BaseTy = getObjectType(LVal.getLValueBase()); + return CheckedHandleSizeof(BaseTy, EndOffset); + } + + // We want to evaluate the size of a subobject. + const SubobjectDesignator &Designator = LVal.Designator; + + // The following is a moderately common idiom in C: + // + // struct Foo { int a; char c[1]; }; + // struct Foo *F = (struct Foo *)malloc(sizeof(struct Foo) + strlen(Bar)); + // strcpy(&F->c[0], Bar); + // + // In order to not break too much legacy code, we need to support it. + if (isUserWritingOffTheEnd(Info.Ctx, LVal)) { + // If we can resolve this to an alloc_size call, we can hand that back, + // because we know for certain how many bytes there are to write to. + llvm::APInt APEndOffset; + if (isBaseAnAllocSizeCall(LVal.getLValueBase()) && + getBytesReturnedByAllocSizeCall(Info.Ctx, LVal, APEndOffset)) + return convertUnsignedAPIntToCharUnits(APEndOffset, EndOffset); + + // If we cannot determine the size of the initial allocation, then we can't + // given an accurate upper-bound. However, we are still able to give + // conservative lower-bounds for Type=3. + if (Type == 1) + return false; + } + + CharUnits BytesPerElem; + if (!CheckedHandleSizeof(Designator.MostDerivedType, BytesPerElem)) + return false; + + // According to the GCC documentation, we want the size of the subobject + // denoted by the pointer. But that's not quite right -- what we actually + // want is the size of the immediately-enclosing array, if there is one. + int64_t ElemsRemaining; + if (Designator.MostDerivedIsArrayElement && + Designator.Entries.size() == Designator.MostDerivedPathLength) { + uint64_t ArraySize = Designator.getMostDerivedArraySize(); + uint64_t ArrayIndex = Designator.Entries.back().ArrayIndex; + ElemsRemaining = ArraySize <= ArrayIndex ? 0 : ArraySize - ArrayIndex; + } else { + ElemsRemaining = Designator.isOnePastTheEnd() ? 0 : 1; + } + + EndOffset = LVal.getLValueOffset() + BytesPerElem * ElemsRemaining; + return true; +} + +/// \brief Tries to evaluate the __builtin_object_size for @p E. If successful, +/// returns true and stores the result in @p Size. +/// +/// If @p WasError is non-null, this will report whether the failure to evaluate +/// is to be treated as an Error in IntExprEvaluator. +static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type, + EvalInfo &Info, uint64_t &Size) { // Determine the denoted object. - LValue Base; + LValue LVal; { // The operand of __builtin_object_size is never evaluated for side-effects. // If there are any, but we can determine the pointed-to object anyway, then // ignore the side-effects. SpeculativeEvaluationRAII SpeculativeEval(Info); - FoldOffsetRAII Fold(Info, Type & 1); + FoldOffsetRAII Fold(Info); if (E->isGLValue()) { // It's possible for us to be given GLValues if we're called via @@ -6936,122 +7501,30 @@ static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type, APValue RVal; if (!EvaluateAsRValue(Info, E, RVal)) return false; - Base.setFrom(Info.Ctx, RVal); - } else if (!EvaluatePointer(ignorePointerCastsAndParens(E), Base, Info)) + LVal.setFrom(Info.Ctx, RVal); + } else if (!EvaluatePointer(ignorePointerCastsAndParens(E), LVal, Info, + /*InvalidBaseOK=*/true)) return false; } - CharUnits BaseOffset = Base.getLValueOffset(); // If we point to before the start of the object, there are no accessible // bytes. - if (BaseOffset.isNegative()) - return Success(0, E); - - // In the case where we're not dealing with a subobject, we discard the - // subobject bit. - bool SubobjectOnly = (Type & 1) != 0 && !refersToCompleteObject(Base); - - // If Type & 1 is 0, we need to be able to statically guarantee that the bytes - // exist. If we can't verify the base, then we can't do that. - // - // As a special case, we produce a valid object size for an unknown object - // with a known designator if Type & 1 is 1. For instance: - // - // extern struct X { char buff[32]; int a, b, c; } *p; - // int a = __builtin_object_size(p->buff + 4, 3); // returns 28 - // int b = __builtin_object_size(p->buff + 4, 2); // returns 0, not 40 - // - // This matches GCC's behavior. - if (Base.InvalidBase && !SubobjectOnly) - return Error(E); - - // If we're not examining only the subobject, then we reset to a complete - // object designator - // - // If Type is 1 and we've lost track of the subobject, just find the complete - // object instead. (If Type is 3, that's not correct behavior and we should - // return 0 instead.) - LValue End = Base; - if (!SubobjectOnly || (End.Designator.Invalid && Type == 1)) { - QualType T = getObjectType(End.getLValueBase()); - if (T.isNull()) - End.Designator.setInvalid(); - else { - End.Designator = SubobjectDesignator(T); - End.Offset = CharUnits::Zero(); - } + if (LVal.getLValueOffset().isNegative()) { + Size = 0; + return true; } - // If it is not possible to determine which objects ptr points to at compile - // time, __builtin_object_size should return (size_t) -1 for type 0 or 1 - // and (size_t) 0 for type 2 or 3. - if (End.Designator.Invalid) + CharUnits EndOffset; + if (!determineEndOffset(Info, E->getExprLoc(), Type, LVal, EndOffset)) return false; - // According to the GCC documentation, we want the size of the subobject - // denoted by the pointer. But that's not quite right -- what we actually - // want is the size of the immediately-enclosing array, if there is one. - int64_t AmountToAdd = 1; - if (End.Designator.MostDerivedIsArrayElement && - End.Designator.Entries.size() == End.Designator.MostDerivedPathLength) { - // We got a pointer to an array. Step to its end. - AmountToAdd = End.Designator.MostDerivedArraySize - - End.Designator.Entries.back().ArrayIndex; - } else if (End.Designator.isOnePastTheEnd()) { - // We're already pointing at the end of the object. - AmountToAdd = 0; - } - - QualType PointeeType = End.Designator.MostDerivedType; - assert(!PointeeType.isNull()); - if (PointeeType->isIncompleteType() || PointeeType->isFunctionType()) - return Error(E); - - if (!HandleLValueArrayAdjustment(Info, E, End, End.Designator.MostDerivedType, - AmountToAdd)) - return false; - - auto EndOffset = End.getLValueOffset(); - - // The following is a moderately common idiom in C: - // - // struct Foo { int a; char c[1]; }; - // struct Foo *F = (struct Foo *)malloc(sizeof(struct Foo) + strlen(Bar)); - // strcpy(&F->c[0], Bar); - // - // So, if we see that we're examining an array at the end of a struct with an - // unknown base, we give up instead of breaking code that behaves this way. - // Note that we only do this when Type=1, because Type=3 is a lower bound, so - // answering conservatively is fine. - // - // We used to be a bit more aggressive here; we'd only be conservative if the - // array at the end was flexible, or if it had 0 or 1 elements. This broke - // some common standard library extensions (PR30346), but was otherwise - // seemingly fine. It may be useful to reintroduce this behavior with some - // sort of whitelist. OTOH, it seems that GCC is always conservative with the - // last element in structs (if it's an array), so our current behavior is more - // compatible than a whitelisting approach would be. - if (End.InvalidBase && SubobjectOnly && Type == 1 && - End.Designator.Entries.size() == End.Designator.MostDerivedPathLength && - End.Designator.MostDerivedIsArrayElement && - isDesignatorAtObjectEnd(Info.Ctx, End)) - return false; - - if (BaseOffset > EndOffset) - return Success(0, E); - - return Success((EndOffset - BaseOffset).getQuantity(), E); -} - -bool IntExprEvaluator::TryEvaluateBuiltinObjectSize(const CallExpr *E, - unsigned Type) { - uint64_t Size; - bool WasError; - if (::tryEvaluateBuiltinObjectSize(E->getArg(0), Type, Info, Size, &WasError)) - return Success(Size, E); - if (WasError) - return Error(E); - return false; + // If we've fallen outside of the end offset, just pretend there's nothing to + // write to/read from. + if (EndOffset <= LVal.getLValueOffset()) + Size = 0; + else + Size = (EndOffset - LVal.getLValueOffset()).getQuantity(); + return true; } bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) { @@ -7073,8 +7546,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue(); assert(Type <= 3 && "unexpected type"); - if (TryEvaluateBuiltinObjectSize(E, Type)) - return true; + uint64_t Size; + if (tryEvaluateBuiltinObjectSize(E->getArg(0), Type, Info, Size)) + return Success(Size, E); if (E->getArg(0)->HasSideEffects(Info.Ctx)) return Success((Type & 2) ? 0 : -1, E); @@ -7087,7 +7561,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case EvalInfo::EM_ConstantFold: case EvalInfo::EM_EvaluateForOverflow: case EvalInfo::EM_IgnoreSideEffects: - case EvalInfo::EM_DesignatorFold: + case EvalInfo::EM_OffsetFold: // Leave it to IR generation. return Error(E); case EvalInfo::EM_ConstantExpressionUnevaluated: @@ -7599,6 +8073,18 @@ bool DataRecursiveIntBinOpEvaluator:: return true; } +static void addOrSubLValueAsInteger(APValue &LVal, APSInt Index, bool IsSub) { + // Compute the new offset in the appropriate width, wrapping at 64 bits. + // FIXME: When compiling for a 32-bit target, we should use 32-bit + // offsets. + assert(!LVal.hasLValuePath() && "have designator for integer lvalue"); + CharUnits &Offset = LVal.getLValueOffset(); + uint64_t Offset64 = Offset.getQuantity(); + uint64_t Index64 = Index.extOrTrunc(64).getZExtValue(); + Offset = CharUnits::fromQuantity(IsSub ? Offset64 - Index64 + : Offset64 + Index64); +} + bool DataRecursiveIntBinOpEvaluator:: VisitBinOp(const EvalResult &LHSResult, const EvalResult &RHSResult, const BinaryOperator *E, APValue &Result) { @@ -7645,12 +8131,7 @@ bool DataRecursiveIntBinOpEvaluator:: // Handle cases like (unsigned long)&a + 4. if (E->isAdditiveOp() && LHSVal.isLValue() && RHSVal.isInt()) { Result = LHSVal; - CharUnits AdditionalOffset = - CharUnits::fromQuantity(RHSVal.getInt().getZExtValue()); - if (E->getOpcode() == BO_Add) - Result.getLValueOffset() += AdditionalOffset; - else - Result.getLValueOffset() -= AdditionalOffset; + addOrSubLValueAsInteger(Result, RHSVal.getInt(), E->getOpcode() == BO_Sub); return true; } @@ -7658,8 +8139,7 @@ bool DataRecursiveIntBinOpEvaluator:: if (E->getOpcode() == BO_Add && RHSVal.isLValue() && LHSVal.isInt()) { Result = RHSVal; - Result.getLValueOffset() += - CharUnits::fromQuantity(LHSVal.getInt().getZExtValue()); + addOrSubLValueAsInteger(Result, LHSVal.getInt(), /*IsSub*/false); return true; } @@ -8341,6 +8821,7 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_IntegralComplexToFloatingComplex: case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLEvent: + case CK_ZeroToOCLQueue: case CK_NonAtomicToAtomic: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: @@ -8838,6 +9319,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLEvent: + case CK_ZeroToOCLQueue: case CK_NonAtomicToAtomic: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: @@ -9214,10 +9696,11 @@ bool ComplexExprEvaluator::VisitInitListExpr(const InitListExpr *E) { namespace { class AtomicExprEvaluator : public ExprEvaluatorBase { + const LValue *This; APValue &Result; public: - AtomicExprEvaluator(EvalInfo &Info, APValue &Result) - : ExprEvaluatorBaseTy(Info), Result(Result) {} + AtomicExprEvaluator(EvalInfo &Info, const LValue *This, APValue &Result) + : ExprEvaluatorBaseTy(Info), This(This), Result(Result) {} bool Success(const APValue &V, const Expr *E) { Result = V; @@ -9227,7 +9710,10 @@ class AtomicExprEvaluator : bool ZeroInitialization(const Expr *E) { ImplicitValueInitExpr VIE( E->getType()->castAs()->getValueType()); - return Evaluate(Result, Info, &VIE); + // For atomic-qualified class (and array) types in C++, initialize the + // _Atomic-wrapped subobject directly, in-place. + return This ? EvaluateInPlace(Result, Info, *This, &VIE) + : Evaluate(Result, Info, &VIE); } bool VisitCastExpr(const CastExpr *E) { @@ -9235,15 +9721,17 @@ class AtomicExprEvaluator : default: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_NonAtomicToAtomic: - return Evaluate(Result, Info, E->getSubExpr()); + return This ? EvaluateInPlace(Result, Info, *This, E->getSubExpr()) + : Evaluate(Result, Info, E->getSubExpr()); } } }; } // end anonymous namespace -static bool EvaluateAtomic(const Expr *E, APValue &Result, EvalInfo &Info) { +static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result, + EvalInfo &Info) { assert(E->isRValue() && E->getType()->isAtomicType()); - return AtomicExprEvaluator(Info, Result).Visit(E); + return AtomicExprEvaluator(Info, This, Result).Visit(E); } //===----------------------------------------------------------------------===// @@ -9348,8 +9836,17 @@ static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E) { if (!EvaluateVoid(E, Info)) return false; } else if (T->isAtomicType()) { - if (!EvaluateAtomic(E, Result, Info)) - return false; + QualType Unqual = T.getAtomicUnqualifiedType(); + if (Unqual->isArrayType() || Unqual->isRecordType()) { + LValue LV; + LV.set(E, Info.CurrentCall->Index); + APValue &Value = Info.CurrentCall->createTemporary(E, false); + if (!EvaluateAtomic(E, &LV, Value, Info)) + return false; + } else { + if (!EvaluateAtomic(E, nullptr, Result, Info)) + return false; + } } else if (Info.getLangOpts().CPlusPlus11) { Info.FFDiag(E, diag::note_constexpr_nonliteral) << E->getType(); return false; @@ -9374,10 +9871,16 @@ static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, if (E->isRValue()) { // Evaluate arrays and record types in-place, so that later initializers can // refer to earlier-initialized members of the object. - if (E->getType()->isArrayType()) + QualType T = E->getType(); + if (T->isArrayType()) return EvaluateArray(E, This, Result, Info); - else if (E->getType()->isRecordType()) + else if (T->isRecordType()) return EvaluateRecord(E, This, Result, Info); + else if (T->isAtomicType()) { + QualType Unqual = T.getAtomicUnqualifiedType(); + if (Unqual->isArrayType() || Unqual->isRecordType()) + return EvaluateAtomic(E, &This, Result, Info); + } } // For any other type, in-place evaluation is unimportant. @@ -9594,7 +10097,7 @@ bool Expr::EvalResult::isGlobalLValue() const { // Note that to reduce code duplication, this helper does no evaluation // itself; the caller checks whether the expression is evaluatable, and // in the rare cases where CheckICE actually cares about the evaluated -// value, it calls into Evalute. +// value, it calls into Evaluate. namespace { @@ -9716,6 +10219,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { case Expr::LambdaExprClass: case Expr::CXXFoldExprClass: case Expr::CoawaitExprClass: + case Expr::DependentCoawaitExprClass: case Expr::CoyieldExprClass: return ICEDiag(IK_NotICE, E->getLocStart()); @@ -10097,10 +10601,25 @@ bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result, bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx, const FunctionDecl *Callee, - ArrayRef Args) const { + ArrayRef Args, + const Expr *This) const { Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpressionUnevaluated); + LValue ThisVal; + const LValue *ThisPtr = nullptr; + if (This) { +#ifndef NDEBUG + auto *MD = dyn_cast(Callee); + assert(MD && "Don't provide `this` for non-methods."); + assert(!MD->isStatic() && "Don't provide `this` for static methods."); +#endif + if (EvaluateObjectArgument(Info, This, ThisVal)) + ThisPtr = &ThisVal; + if (Info.EvalStatus.HasSideEffects) + return false; + } + ArgVector ArgValues(Args.size()); for (ArrayRef::iterator I = Args.begin(), E = Args.end(); I != E; ++I) { @@ -10113,7 +10632,7 @@ bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx, } // Build fake call to Callee. - CallStackFrame Frame(Info, Callee->getLocation(), Callee, /*This*/nullptr, + CallStackFrame Frame(Info, Callee->getLocation(), Callee, ThisPtr, ArgValues.data()); return Evaluate(Value, Info, this) && !Info.EvalStatus.HasSideEffects; } @@ -10190,5 +10709,5 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, Expr::EvalStatus Status; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); - return ::tryEvaluateBuiltinObjectSize(this, Type, Info, Result); + return tryEvaluateBuiltinObjectSize(this, Type, Info, Result); } diff --git a/tools/clang/lib/AST/ItaniumMangle.cpp b/tools/clang/lib/AST/ItaniumMangle.cpp index 7177100..cf5bf6f 100644 --- a/tools/clang/lib/AST/ItaniumMangle.cpp +++ b/tools/clang/lib/AST/ItaniumMangle.cpp @@ -1190,6 +1190,8 @@ void CXXNameMangler::mangleUnresolvedName( llvm_unreachable("Can't mangle a constructor name!"); case DeclarationName::CXXUsingDirective: llvm_unreachable("Can't mangle a using directive name!"); + case DeclarationName::CXXDeductionGuideName: + llvm_unreachable("Can't mangle a deduction guide name!"); case DeclarationName::ObjCMultiArgSelector: case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCZeroArgSelector: @@ -1419,6 +1421,9 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, writeAbiTags(ND, AdditionalAbiTags); break; + case DeclarationName::CXXDeductionGuideName: + llvm_unreachable("Can't mangle a deduction guide name!"); + case DeclarationName::CXXUsingDirective: llvm_unreachable("Can't mangle a using directive name!"); } @@ -1870,6 +1875,7 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty, case Type::Paren: case Type::Attributed: case Type::Auto: + case Type::DeducedTemplateSpecialization: case Type::PackExpansion: case Type::ObjCObject: case Type::ObjCInterface: @@ -1996,6 +2002,7 @@ void CXXNameMangler::mangleOperatorName(DeclarationName Name, unsigned Arity) { switch (Name.getNameKind()) { case DeclarationName::CXXConstructorName: case DeclarationName::CXXDestructorName: + case DeclarationName::CXXDeductionGuideName: case DeclarationName::CXXUsingDirective: case DeclarationName::Identifier: case DeclarationName::ObjCMultiArgSelector: @@ -2493,9 +2500,6 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { case BuiltinType::OCLQueue: Out << "9ocl_queue"; break; - case BuiltinType::OCLNDRange: - Out << "11ocl_ndrange"; - break; case BuiltinType::OCLReserveID: Out << "13ocl_reserveid"; break; @@ -3049,6 +3053,7 @@ void CXXNameMangler::mangleType(const DependentNameType *T) { // ::= Te # dependent elaborated type specifier using // # 'enum' switch (T->getKeyword()) { + case ETK_None: case ETK_Typename: break; case ETK_Struct: @@ -3062,8 +3067,6 @@ void CXXNameMangler::mangleType(const DependentNameType *T) { case ETK_Enum: Out << "Te"; break; - default: - llvm_unreachable("unexpected keyword for dependent type name"); } // Typename types are always nested Out << 'N'; @@ -3152,6 +3155,16 @@ void CXXNameMangler::mangleType(const AutoType *T) { mangleType(D); } +void CXXNameMangler::mangleType(const DeducedTemplateSpecializationType *T) { + // FIXME: This is not the right mangling. We also need to include a scope + // here in some cases. + QualType D = T->getDeducedType(); + if (D.isNull()) + mangleUnscopedTemplateName(T->getTemplateName(), nullptr); + else + mangleType(D); +} + void CXXNameMangler::mangleType(const AtomicType *T) { // ::= U # vendor extended type qualifier // (Until there's a standardized mangling...) @@ -4027,6 +4040,12 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { mangleExpression(cast(E)->getOperand()); break; + case Expr::DependentCoawaitExprClass: + // FIXME: Propose a non-vendor mangling. + Out << "v18co_await"; + mangleExpression(cast(E)->getOperand()); + break; + case Expr::CoyieldExprClass: // FIXME: Propose a non-vendor mangling. Out << "v18co_yield"; diff --git a/tools/clang/lib/AST/MicrosoftMangle.cpp b/tools/clang/lib/AST/MicrosoftMangle.cpp index 084c2cd..a8631e6 100644 --- a/tools/clang/lib/AST/MicrosoftMangle.cpp +++ b/tools/clang/lib/AST/MicrosoftMangle.cpp @@ -109,13 +109,13 @@ static const DeclContext *getEffectiveParentContext(const DeclContext *DC) { static const FunctionDecl *getStructor(const NamedDecl *ND) { if (const auto *FTD = dyn_cast(ND)) - return FTD->getTemplatedDecl(); + return FTD->getTemplatedDecl()->getCanonicalDecl(); const auto *FD = cast(ND); if (const auto *FTD = FD->getPrimaryTemplate()) - return FTD->getTemplatedDecl(); + return FTD->getTemplatedDecl()->getCanonicalDecl(); - return FD; + return FD->getCanonicalDecl(); } /// MicrosoftMangleContextImpl - Overrides the default MangleContext for the @@ -312,6 +312,10 @@ class MicrosoftCXXNameMangler { void mangleNestedName(const NamedDecl *ND); private: + bool isStructorDecl(const NamedDecl *ND) const { + return ND == Structor || getStructor(ND) == Structor; + } + void mangleUnqualifiedName(const NamedDecl *ND) { mangleUnqualifiedName(ND, ND->getDeclName()); } @@ -863,21 +867,28 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, } } - llvm::SmallString<64> Name(" Name; if (DeclaratorDecl *DD = Context.getASTContext().getDeclaratorForUnnamedTagDecl(TD)) { // Anonymous types without a name for linkage purposes have their // declarator mangled in if they have one. + Name += "getName(); } else if (TypedefNameDecl *TND = Context.getASTContext().getTypedefNameForUnnamedTagDecl( TD)) { // Anonymous types without a name for linkage purposes have their // associate typedef mangled in if they have one. + Name += "getName(); + } else if (auto *ED = dyn_cast(TD)) { + auto EnumeratorI = ED->enumerator_begin(); + assert(EnumeratorI != ED->enumerator_end()); + Name += "getName(); } else { // Otherwise, number the types using a $S prefix. - Name += "$S"; + Name += "(StructorType)); @@ -931,6 +942,9 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, break; } + case DeclarationName::CXXDeductionGuideName: + llvm_unreachable("Can't mangle a deduction guide name!"); + case DeclarationName::CXXUsingDirective: llvm_unreachable("Can't mangle a using directive name!"); } @@ -1786,10 +1800,6 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers, Out << "PA"; mangleArtificalTagType(TTK_Struct, "ocl_queue"); break; - case BuiltinType::OCLNDRange: - Out << "PA"; - mangleArtificalTagType(TTK_Struct, "ocl_ndrange"); - break; case BuiltinType::OCLReserveID: Out << "PA"; mangleArtificalTagType(TTK_Struct, "ocl_reserveid"); @@ -1857,7 +1867,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, IsStructor = true; IsCtorClosure = (StructorType == Ctor_CopyingClosure || StructorType == Ctor_DefaultClosure) && - getStructor(MD) == Structor; + isStructorDecl(MD); if (IsCtorClosure) CC = getASTContext().getDefaultCallingConvention( /*IsVariadic=*/false, /*IsCXXMethod=*/true); @@ -1878,14 +1888,18 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, // ::= // ::= @ # structors (they have no declared return type) if (IsStructor) { - if (isa(D) && D == Structor && - StructorType == Dtor_Deleting) { - // The scalar deleting destructor takes an extra int argument. - // However, the FunctionType generated has 0 arguments. - // FIXME: This is a temporary hack. - // Maybe should fix the FunctionType creation instead? - Out << (PointersAre64Bit ? "PEAXI@Z" : "PAXI@Z"); - return; + if (isa(D) && isStructorDecl(D)) { + // The scalar deleting destructor takes an extra int argument which is not + // reflected in the AST. + if (StructorType == Dtor_Deleting) { + Out << (PointersAre64Bit ? "PEAXI@Z" : "PAXI@Z"); + return; + } + // The vbase destructor returns void which is not reflected in the AST. + if (StructorType == Dtor_Complete) { + Out << "XXZ"; + return; + } } if (IsCtorClosure) { // Default constructor closure and copy constructor closure both return @@ -1945,7 +1959,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, // Happens for function pointer type arguments for example. for (unsigned I = 0, E = Proto->getNumParams(); I != E; ++I) { mangleArgumentType(Proto->getParamType(I), Range); - // Mangle each pass_object_size parameter as if it's a paramater of enum + // Mangle each pass_object_size parameter as if it's a parameter of enum // type passed directly after the parameter with the pass_object_size // attribute. The aforementioned enum's name is __pass_object_size, and we // pretend it resides in a top-level namespace called __clang. @@ -1993,13 +2007,20 @@ void MicrosoftCXXNameMangler::mangleFunctionClass(const FunctionDecl *FD) { // ::= Y # global near // ::= Z # global far if (const CXXMethodDecl *MD = dyn_cast(FD)) { + bool IsVirtual = MD->isVirtual(); + // When mangling vbase destructor variants, ignore whether or not the + // underlying destructor was defined to be virtual. + if (isa(MD) && isStructorDecl(MD) && + StructorType == Dtor_Complete) { + IsVirtual = false; + } switch (MD->getAccess()) { case AS_none: llvm_unreachable("Unsupported access specifier"); case AS_private: if (MD->isStatic()) Out << 'C'; - else if (MD->isVirtual()) + else if (IsVirtual) Out << 'E'; else Out << 'A'; @@ -2007,7 +2028,7 @@ void MicrosoftCXXNameMangler::mangleFunctionClass(const FunctionDecl *FD) { case AS_protected: if (MD->isStatic()) Out << 'K'; - else if (MD->isVirtual()) + else if (IsVirtual) Out << 'M'; else Out << 'I'; @@ -2015,7 +2036,7 @@ void MicrosoftCXXNameMangler::mangleFunctionClass(const FunctionDecl *FD) { case AS_public: if (MD->isStatic()) Out << 'S'; - else if (MD->isVirtual()) + else if (IsVirtual) Out << 'U'; else Out << 'Q'; @@ -2465,6 +2486,17 @@ void MicrosoftCXXNameMangler::mangleType(const AutoType *T, Qualifiers, << Range; } +void MicrosoftCXXNameMangler::mangleType( + const DeducedTemplateSpecializationType *T, Qualifiers, SourceRange Range) { + assert(T->getDeducedType().isNull() && "expecting a dependent type!"); + + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this deduced class template specialization type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + void MicrosoftCXXNameMangler::mangleType(const AtomicType *T, Qualifiers, SourceRange Range) { QualType ValueType = T->getValueType(); @@ -2988,14 +3020,14 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL, // N.B. The length is in terms of bytes, not characters. Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth()); - auto GetLittleEndianByte = [&Mangler, &SL](unsigned Index) { + auto GetLittleEndianByte = [&SL](unsigned Index) { unsigned CharByteWidth = SL->getCharByteWidth(); uint32_t CodeUnit = SL->getCodeUnit(Index / CharByteWidth); unsigned OffsetInCodeUnit = Index % CharByteWidth; return static_cast((CodeUnit >> (8 * OffsetInCodeUnit)) & 0xff); }; - auto GetBigEndianByte = [&Mangler, &SL](unsigned Index) { + auto GetBigEndianByte = [&SL](unsigned Index) { unsigned CharByteWidth = SL->getCharByteWidth(); uint32_t CodeUnit = SL->getCodeUnit(Index / CharByteWidth); unsigned OffsetInCodeUnit = (CharByteWidth - 1) - (Index % CharByteWidth); diff --git a/tools/clang/lib/AST/NSAPI.cpp b/tools/clang/lib/AST/NSAPI.cpp index d4e9fac..79a3c22 100644 --- a/tools/clang/lib/AST/NSAPI.cpp +++ b/tools/clang/lib/AST/NSAPI.cpp @@ -455,7 +455,6 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: case BuiltinType::BoundMember: case BuiltinType::Dependent: diff --git a/tools/clang/lib/AST/Type.cpp b/tools/clang/lib/AST/Type.cpp index b56be1e..32330bc 100644 --- a/tools/clang/lib/AST/Type.cpp +++ b/tools/clang/lib/AST/Type.cpp @@ -1559,61 +1559,79 @@ TagDecl *Type::getAsTagDecl() const { } namespace { - class GetContainedAutoVisitor : - public TypeVisitor { + class GetContainedDeducedTypeVisitor : + public TypeVisitor { + bool Syntactic; public: - using TypeVisitor::Visit; - AutoType *Visit(QualType T) { + GetContainedDeducedTypeVisitor(bool Syntactic = false) + : Syntactic(Syntactic) {} + + using TypeVisitor::Visit; + Type *Visit(QualType T) { if (T.isNull()) return nullptr; return Visit(T.getTypePtr()); } - // The 'auto' type itself. - AutoType *VisitAutoType(const AutoType *AT) { - return const_cast(AT); + // The deduced type itself. + Type *VisitDeducedType(const DeducedType *AT) { + return const_cast(AT); } // Only these types can contain the desired 'auto' type. - AutoType *VisitPointerType(const PointerType *T) { + Type *VisitElaboratedType(const ElaboratedType *T) { + return Visit(T->getNamedType()); + } + Type *VisitPointerType(const PointerType *T) { return Visit(T->getPointeeType()); } - AutoType *VisitBlockPointerType(const BlockPointerType *T) { + Type *VisitBlockPointerType(const BlockPointerType *T) { return Visit(T->getPointeeType()); } - AutoType *VisitReferenceType(const ReferenceType *T) { + Type *VisitReferenceType(const ReferenceType *T) { return Visit(T->getPointeeTypeAsWritten()); } - AutoType *VisitMemberPointerType(const MemberPointerType *T) { + Type *VisitMemberPointerType(const MemberPointerType *T) { return Visit(T->getPointeeType()); } - AutoType *VisitArrayType(const ArrayType *T) { + Type *VisitArrayType(const ArrayType *T) { return Visit(T->getElementType()); } - AutoType *VisitDependentSizedExtVectorType( + Type *VisitDependentSizedExtVectorType( const DependentSizedExtVectorType *T) { return Visit(T->getElementType()); } - AutoType *VisitVectorType(const VectorType *T) { + Type *VisitVectorType(const VectorType *T) { return Visit(T->getElementType()); } - AutoType *VisitFunctionType(const FunctionType *T) { + Type *VisitFunctionProtoType(const FunctionProtoType *T) { + if (Syntactic && T->hasTrailingReturn()) + return const_cast(T); + return VisitFunctionType(T); + } + Type *VisitFunctionType(const FunctionType *T) { return Visit(T->getReturnType()); } - AutoType *VisitParenType(const ParenType *T) { + Type *VisitParenType(const ParenType *T) { return Visit(T->getInnerType()); } - AutoType *VisitAttributedType(const AttributedType *T) { + Type *VisitAttributedType(const AttributedType *T) { return Visit(T->getModifiedType()); } - AutoType *VisitAdjustedType(const AdjustedType *T) { + Type *VisitAdjustedType(const AdjustedType *T) { return Visit(T->getOriginalType()); } }; } -AutoType *Type::getContainedAutoType() const { - return GetContainedAutoVisitor().Visit(this); +DeducedType *Type::getContainedDeducedType() const { + return cast_or_null( + GetContainedDeducedTypeVisitor().Visit(this)); +} + +bool Type::hasAutoForTrailingReturnType() const { + return dyn_cast_or_null( + GetContainedDeducedTypeVisitor(true).Visit(this)); } bool Type::hasIntegerRepresentation() const { @@ -2634,8 +2652,6 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const { return "clk_event_t"; case OCLQueue: return "queue_t"; - case OCLNDRange: - return "ndrange_t"; case OCLReserveID: return "reserve_id_t"; case OMPArraySection: @@ -3369,6 +3385,7 @@ static CachedProperties computeCachedProperties(const Type *T) { return CachedProperties(ExternalLinkage, false); case Type::Auto: + case Type::DeducedTemplateSpecialization: // Give non-deduced 'auto' types external linkage. We should only see them // here in error recovery. return CachedProperties(ExternalLinkage, false); @@ -3476,6 +3493,7 @@ static LinkageInfo computeLinkageInfo(const Type *T) { return LinkageInfo::external(); case Type::Auto: + case Type::DeducedTemplateSpecialization: return LinkageInfo::external(); case Type::Record: @@ -3612,7 +3630,8 @@ bool Type::canHaveNullability() const { // auto is considered dependent when it isn't deduced. case Type::Auto: - return !cast(type.getTypePtr())->isDeduced(); + case Type::DeducedTemplateSpecialization: + return !cast(type.getTypePtr())->isDeduced(); case Type::Builtin: switch (cast(type.getTypePtr())->getKind()) { @@ -3644,7 +3663,6 @@ bool Type::canHaveNullability() const { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: case BuiltinType::BuiltinFn: case BuiltinType::NullPtr: diff --git a/tools/clang/lib/AST/TypeLoc.cpp b/tools/clang/lib/AST/TypeLoc.cpp index f9cf490..28c818b 100644 --- a/tools/clang/lib/AST/TypeLoc.cpp +++ b/tools/clang/lib/AST/TypeLoc.cpp @@ -342,7 +342,6 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: case BuiltinType::BuiltinFn: case BuiltinType::OMPArraySection: diff --git a/tools/clang/lib/Basic/TargetInfo.cpp b/tools/clang/lib/Basic/TargetInfo.cpp index acf5217..1925ff5 100644 --- a/tools/clang/lib/Basic/TargetInfo.cpp +++ b/tools/clang/lib/Basic/TargetInfo.cpp @@ -411,8 +411,8 @@ bool TargetInfo::isValidGCCRegisterName(StringRef Name) const { return false; } -StringRef -TargetInfo::getNormalizedGCCRegisterName(StringRef Name) const { +StringRef TargetInfo::getNormalizedGCCRegisterName(StringRef Name, + bool ReturnCanonical) const { assert(isValidGCCRegisterName(Name) && "Invalid register passed in"); // Get rid of any register prefix. @@ -437,7 +437,7 @@ TargetInfo::getNormalizedGCCRegisterName(StringRef Name) const { // Make sure the register that the additional name is for is within // the bounds of the register names from above. if (AN == Name && ARN.RegNum < Names.size()) - return Name; + return ReturnCanonical ? Names[ARN.RegNum] : Name; } // Now check aliases. diff --git a/tools/clang/lib/Basic/Targets.cpp b/tools/clang/lib/Basic/Targets.cpp index cd6477e..95898cf 100644 --- a/tools/clang/lib/Basic/Targets.cpp +++ b/tools/clang/lib/Basic/Targets.cpp @@ -512,7 +512,7 @@ class NetBSDTargetInfo : public OSTargetInfo { Builder.defineMacro("__unix__"); Builder.defineMacro("__ELF__"); if (Opts.POSIXThreads) - Builder.defineMacro("_POSIX_THREADS"); + Builder.defineMacro("_REENTRANT"); switch (Triple.getArch()) { default: @@ -545,6 +545,8 @@ class OpenBSDTargetInfo : public OSTargetInfo { Builder.defineMacro("__ELF__"); if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); + if (this->HasFloat128) + Builder.defineMacro("__FLOAT128__"); } public: OpenBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) @@ -552,11 +554,11 @@ class OpenBSDTargetInfo : public OSTargetInfo { this->TLSSupported = false; switch (Triple.getArch()) { - default: case llvm::Triple::x86: case llvm::Triple::x86_64: - case llvm::Triple::arm: - case llvm::Triple::sparc: + this->HasFloat128 = true; + // FALLTHROUGH + default: this->MCountName = "__mcount"; break; case llvm::Triple::mips64: @@ -1232,6 +1234,11 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, if (LongDoubleWidth == 128) Builder.defineMacro("__LONG_DOUBLE_128__"); + // Define this for elfv2 (64-bit only) or 64-bit darwin. + if (ABI == "elfv2" || + (getTriple().getOS() == llvm::Triple::Darwin && PointerWidth == 64)) + Builder.defineMacro("__STRUCT_PARM_ALIGN__", "16"); + if (Opts.AltiVec) { Builder.defineMacro("__VEC__", "10206"); Builder.defineMacro("__ALTIVEC__"); @@ -1751,30 +1758,57 @@ class NVPTXTargetInfo : public TargetInfo { static const char *const GCCRegNames[]; static const Builtin::Info BuiltinInfo[]; CudaArch GPU; + std::unique_ptr HostTarget; public: - NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts, + unsigned TargetPointerWidth) : TargetInfo(Triple) { + assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && + "NVPTX only supports 32- and 64-bit modes."); + TLSSupported = false; - LongWidth = LongAlign = 64; AddrSpaceMap = &NVPTXAddrSpaceMap; UseAddrSpaceMapMangling = true; + // Define available target features // These must be defined in sorted order! NoAsmVariants = true; GPU = CudaArch::SM_20; + if (TargetPointerWidth == 32) + resetDataLayout("e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"); + else + resetDataLayout("e-i64:64-v16:16-v32:32-n16:32:64"); + // If possible, get a TargetInfo for our host triple, so we can match its // types. llvm::Triple HostTriple(Opts.HostTriple); - if (HostTriple.isNVPTX()) - return; - std::unique_ptr HostTarget( - AllocateTarget(llvm::Triple(Opts.HostTriple), Opts)); + if (!HostTriple.isNVPTX()) + HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts)); + + // If no host target, make some guesses about the data layout and return. if (!HostTarget) { + LongWidth = LongAlign = TargetPointerWidth; + PointerWidth = PointerAlign = TargetPointerWidth; + switch (TargetPointerWidth) { + case 32: + SizeType = TargetInfo::UnsignedInt; + PtrDiffType = TargetInfo::SignedInt; + IntPtrType = TargetInfo::SignedInt; + break; + case 64: + SizeType = TargetInfo::UnsignedLong; + PtrDiffType = TargetInfo::SignedLong; + IntPtrType = TargetInfo::SignedLong; + break; + default: + llvm_unreachable("TargetPointerWidth must be 32 or 64"); + } return; } + // Copy properties from host target. PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0); PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0); BoolWidth = HostTarget->getBoolWidth(); @@ -1924,16 +1958,26 @@ class NVPTXTargetInfo : public TargetInfo { } void setSupportedOpenCLOpts() override { auto &Opts = getSupportedOpenCLOpts(); - Opts.cl_clang_storage_class_specifiers = 1; - Opts.cl_khr_gl_sharing = 1; - Opts.cl_khr_icd = 1; + Opts.support("cl_clang_storage_class_specifiers"); + Opts.support("cl_khr_gl_sharing"); + Opts.support("cl_khr_icd"); - Opts.cl_khr_fp64 = 1; - Opts.cl_khr_byte_addressable_store = 1; - Opts.cl_khr_global_int32_base_atomics = 1; - Opts.cl_khr_global_int32_extended_atomics = 1; - Opts.cl_khr_local_int32_base_atomics = 1; - Opts.cl_khr_local_int32_extended_atomics = 1; + Opts.support("cl_khr_fp64"); + Opts.support("cl_khr_byte_addressable_store"); + Opts.support("cl_khr_global_int32_base_atomics"); + Opts.support("cl_khr_global_int32_extended_atomics"); + Opts.support("cl_khr_local_int32_base_atomics"); + Opts.support("cl_khr_local_int32_extended_atomics"); + } + + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { + // CUDA compilations support all of the host's calling conventions. + // + // TODO: We should warn if you apply a non-default CC to anything other than + // a host function. + if (HostTarget) + return HostTarget->checkCallingConvention(CC); + return CCCR_Warning; } }; @@ -1953,31 +1997,6 @@ ArrayRef NVPTXTargetInfo::getGCCRegNames() const { return llvm::makeArrayRef(GCCRegNames); } -class NVPTX32TargetInfo : public NVPTXTargetInfo { -public: - NVPTX32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) - : NVPTXTargetInfo(Triple, Opts) { - LongWidth = LongAlign = 32; - PointerWidth = PointerAlign = 32; - SizeType = TargetInfo::UnsignedInt; - PtrDiffType = TargetInfo::SignedInt; - IntPtrType = TargetInfo::SignedInt; - resetDataLayout("e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"); - } -}; - -class NVPTX64TargetInfo : public NVPTXTargetInfo { -public: - NVPTX64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) - : NVPTXTargetInfo(Triple, Opts) { - PointerWidth = PointerAlign = 64; - SizeType = TargetInfo::UnsignedLong; - PtrDiffType = TargetInfo::SignedLong; - IntPtrType = TargetInfo::SignedLong; - resetDataLayout("e-i64:64-v16:16-v32:32-n16:32:64"); - } -}; - static const unsigned AMDGPUAddrSpaceMap[] = { 1, // opencl_global 3, // opencl_local @@ -2017,7 +2036,8 @@ class AMDGPUTargetInfo final : public TargetInfo { GK_CAYMAN, GK_GFX6, GK_GFX7, - GK_GFX8 + GK_GFX8, + GK_GFX9 } GPU; bool hasFP64:1; @@ -2101,15 +2121,15 @@ class AMDGPUTargetInfo final : public TargetInfo { for (auto &I : TargetOpts.FeaturesAsWritten) { if (I == "+fp32-denormals" || I == "-fp32-denormals") hasFP32Denormals = true; - if (I == "+fp64-denormals" || I == "-fp64-denormals") + if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") hasFP64Denormals = true; } if (!hasFP32Denormals) TargetOpts.Features.push_back((Twine(hasFullSpeedFP32Denorms && !CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str()); - // Always do not flush fp64 denorms. + // Always do not flush fp64 or fp16 denorms. if (!hasFP64Denormals && hasFP64) - TargetOpts.Features.push_back("+fp64-denormals"); + TargetOpts.Features.push_back("+fp64-fp16-denormals"); } ArrayRef getTargetBuiltins() const override { @@ -2194,6 +2214,8 @@ class AMDGPUTargetInfo final : public TargetInfo { .Case("gfx803", GK_GFX8) .Case("gfx804", GK_GFX8) .Case("gfx810", GK_GFX8) + .Case("gfx900", GK_GFX9) + .Case("gfx901", GK_GFX9) .Default(GK_NONE); } @@ -2208,27 +2230,27 @@ class AMDGPUTargetInfo final : public TargetInfo { void setSupportedOpenCLOpts() override { auto &Opts = getSupportedOpenCLOpts(); - Opts.cl_clang_storage_class_specifiers = 1; - Opts.cl_khr_icd = 1; + Opts.support("cl_clang_storage_class_specifiers"); + Opts.support("cl_khr_icd"); if (hasFP64) - Opts.cl_khr_fp64 = 1; + Opts.support("cl_khr_fp64"); if (GPU >= GK_EVERGREEN) { - Opts.cl_khr_byte_addressable_store = 1; - Opts.cl_khr_global_int32_base_atomics = 1; - Opts.cl_khr_global_int32_extended_atomics = 1; - Opts.cl_khr_local_int32_base_atomics = 1; - Opts.cl_khr_local_int32_extended_atomics = 1; + Opts.support("cl_khr_byte_addressable_store"); + Opts.support("cl_khr_global_int32_base_atomics"); + Opts.support("cl_khr_global_int32_extended_atomics"); + Opts.support("cl_khr_local_int32_base_atomics"); + Opts.support("cl_khr_local_int32_extended_atomics"); } if (GPU >= GK_GFX6) { - Opts.cl_khr_fp16 = 1; - Opts.cl_khr_int64_base_atomics = 1; - Opts.cl_khr_int64_extended_atomics = 1; - Opts.cl_khr_mipmap_image = 1; - Opts.cl_khr_subgroups = 1; - Opts.cl_khr_3d_image_writes = 1; - Opts.cl_amd_media_ops = 1; - Opts.cl_amd_media_ops2 = 1; + Opts.support("cl_khr_fp16"); + Opts.support("cl_khr_int64_base_atomics"); + Opts.support("cl_khr_int64_extended_atomics"); + Opts.support("cl_khr_mipmap_image"); + Opts.support("cl_khr_subgroups"); + Opts.support("cl_khr_3d_image_writes"); + Opts.support("cl_amd_media_ops"); + Opts.support("cl_amd_media_ops2"); } } @@ -2236,6 +2258,32 @@ class AMDGPUTargetInfo final : public TargetInfo { return LangAS::opencl_constant; } + /// \returns Target specific vtbl ptr address space. + unsigned getVtblPtrAddressSpace() const override { + // \todo: We currently have address spaces defined in AMDGPU Backend. It + // would be nice if we could use it here instead of using bare numbers (same + // applies to getDWARFAddressSpace). + return 2; // constant. + } + + /// \returns If a target requires an address within a target specific address + /// space \p AddressSpace to be converted in order to be used, then return the + /// corresponding target specific DWARF address space. + /// + /// \returns Otherwise return None and no conversion will be emitted in the + /// DWARF. + Optional getDWARFAddressSpace( + unsigned AddressSpace) const override { + switch (AddressSpace) { + case 0: // LLVM Private. + return 1; // DWARF Private. + case 3: // LLVM Local. + return 2; // DWARF Local. + default: + return None; + } + } + CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { switch (CC) { default: @@ -2333,9 +2381,13 @@ bool AMDGPUTargetInfo::initFeatureMap( case GK_GFX7: break; + case GK_GFX9: + Features["gfx9-insts"] = true; + LLVM_FALLTHROUGH; case GK_GFX8: Features["s-memrealtime"] = true; Features["16-bit-insts"] = true; + Features["dpp"] = true; break; case GK_NONE: @@ -2477,11 +2529,10 @@ class X86TargetInfo : public TargetInfo { bool HasXSAVEC = false; bool HasXSAVES = false; bool HasMWAITX = false; + bool HasCLZERO = false; bool HasPKU = false; bool HasCLFLUSHOPT = false; - bool HasPCOMMIT = false; bool HasCLWB = false; - bool HasUMIP = false; bool HasMOVBE = false; bool HasPREFETCHWT1 = false; @@ -2651,6 +2702,12 @@ class X86TargetInfo : public TargetInfo { CK_BDVER4, //@} + /// \name zen + /// Zen architecture processors. + //@{ + CK_ZNVER1, + //@} + /// This specification is deprecated and will be removed in the future. /// Users should prefer \see CK_K8. // FIXME: Warn on this when the CPU is set to it. @@ -2732,6 +2789,7 @@ class X86TargetInfo : public TargetInfo { .Case("bdver2", CK_BDVER2) .Case("bdver3", CK_BDVER3) .Case("bdver4", CK_BDVER4) + .Case("znver1", CK_ZNVER1) .Case("x86-64", CK_x86_64) .Case("geode", CK_Geode) .Default(CK_Generic); @@ -2789,6 +2847,40 @@ class X86TargetInfo : public TargetInfo { const char *getClobbers() const override { return "~{dirflag},~{fpsr},~{flags}"; } + + StringRef getConstraintRegister(const StringRef &Constraint, + const StringRef &Expression) const override { + StringRef::iterator I, E; + for (I = Constraint.begin(), E = Constraint.end(); I != E; ++I) { + if (isalpha(*I)) + break; + } + if (I == E) + return ""; + switch (*I) { + // For the register constraints, return the matching register name + case 'a': + return "ax"; + case 'b': + return "bx"; + case 'c': + return "cx"; + case 'd': + return "dx"; + case 'S': + return "si"; + case 'D': + return "di"; + // In case the constraint is 'r' we need to return Expression + case 'r': + return Expression; + default: + // Default value if there is no constraint for the register + return ""; + } + return ""; + } + void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; static void setSSELevel(llvm::StringMap &Features, X86SSEEnum Level, @@ -2897,6 +2989,7 @@ class X86TargetInfo : public TargetInfo { case CK_BDVER2: case CK_BDVER3: case CK_BDVER4: + case CK_ZNVER1: case CK_x86_64: return true; } @@ -2932,7 +3025,7 @@ class X86TargetInfo : public TargetInfo { } void setSupportedOpenCLOpts() override { - getSupportedOpenCLOpts().setAll(); + getSupportedOpenCLOpts().supportAll(); } }; @@ -3013,8 +3106,7 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512ifma", true); setFeatureEnabledImpl(Features, "avx512vbmi", true); setFeatureEnabledImpl(Features, "sha", true); - setFeatureEnabledImpl(Features, "umip", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_SkylakeServer: setFeatureEnabledImpl(Features, "avx512f", true); setFeatureEnabledImpl(Features, "avx512cd", true); @@ -3022,20 +3114,19 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512bw", true); setFeatureEnabledImpl(Features, "avx512vl", true); setFeatureEnabledImpl(Features, "pku", true); - setFeatureEnabledImpl(Features, "pcommit", true); setFeatureEnabledImpl(Features, "clwb", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_SkylakeClient: setFeatureEnabledImpl(Features, "xsavec", true); setFeatureEnabledImpl(Features, "xsaves", true); setFeatureEnabledImpl(Features, "mpx", true); setFeatureEnabledImpl(Features, "sgx", true); setFeatureEnabledImpl(Features, "clflushopt", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_Broadwell: setFeatureEnabledImpl(Features, "rdseed", true); setFeatureEnabledImpl(Features, "adx", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_Haswell: setFeatureEnabledImpl(Features, "avx2", true); setFeatureEnabledImpl(Features, "lzcnt", true); @@ -3044,22 +3135,22 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "rtm", true); setFeatureEnabledImpl(Features, "fma", true); setFeatureEnabledImpl(Features, "movbe", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_IvyBridge: setFeatureEnabledImpl(Features, "rdrnd", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "fsgsbase", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_SandyBridge: setFeatureEnabledImpl(Features, "avx", true); setFeatureEnabledImpl(Features, "xsave", true); setFeatureEnabledImpl(Features, "xsaveopt", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_Westmere: case CK_Silvermont: setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_Nehalem: setFeatureEnabledImpl(Features, "sse4.2", true); setFeatureEnabledImpl(Features, "fxsr", true); @@ -3119,7 +3210,7 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "sse4a", true); setFeatureEnabledImpl(Features, "lzcnt", true); setFeatureEnabledImpl(Features, "popcnt", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_K8SSE3: case CK_OpteronSSE3: case CK_Athlon64SSE3: @@ -3134,7 +3225,7 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "xsaveopt", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_BTVER1: setFeatureEnabledImpl(Features, "ssse3", true); setFeatureEnabledImpl(Features, "sse4a", true); @@ -3144,21 +3235,49 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "cx16", true); setFeatureEnabledImpl(Features, "fxsr", true); break; + case CK_ZNVER1: + setFeatureEnabledImpl(Features, "adx", true); + setFeatureEnabledImpl(Features, "aes", true); + setFeatureEnabledImpl(Features, "avx2", true); + setFeatureEnabledImpl(Features, "bmi", true); + setFeatureEnabledImpl(Features, "bmi2", true); + setFeatureEnabledImpl(Features, "clflushopt", true); + setFeatureEnabledImpl(Features, "clzero", true); + setFeatureEnabledImpl(Features, "cx16", true); + setFeatureEnabledImpl(Features, "f16c", true); + setFeatureEnabledImpl(Features, "fma", true); + setFeatureEnabledImpl(Features, "fsgsbase", true); + setFeatureEnabledImpl(Features, "fxsr", true); + setFeatureEnabledImpl(Features, "lzcnt", true); + setFeatureEnabledImpl(Features, "mwaitx", true); + setFeatureEnabledImpl(Features, "movbe", true); + setFeatureEnabledImpl(Features, "pclmul", true); + setFeatureEnabledImpl(Features, "popcnt", true); + setFeatureEnabledImpl(Features, "prfchw", true); + setFeatureEnabledImpl(Features, "rdrnd", true); + setFeatureEnabledImpl(Features, "rdseed", true); + setFeatureEnabledImpl(Features, "sha", true); + setFeatureEnabledImpl(Features, "sse4a", true); + setFeatureEnabledImpl(Features, "xsave", true); + setFeatureEnabledImpl(Features, "xsavec", true); + setFeatureEnabledImpl(Features, "xsaveopt", true); + setFeatureEnabledImpl(Features, "xsaves", true); + break; case CK_BDVER4: setFeatureEnabledImpl(Features, "avx2", true); setFeatureEnabledImpl(Features, "bmi2", true); setFeatureEnabledImpl(Features, "mwaitx", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_BDVER3: setFeatureEnabledImpl(Features, "fsgsbase", true); setFeatureEnabledImpl(Features, "xsaveopt", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_BDVER2: setFeatureEnabledImpl(Features, "bmi", true); setFeatureEnabledImpl(Features, "fma", true); setFeatureEnabledImpl(Features, "f16c", true); setFeatureEnabledImpl(Features, "tbm", true); - // FALLTHROUGH + LLVM_FALLTHROUGH; case CK_BDVER1: // xop implies avx, sse4a and fma4. setFeatureEnabledImpl(Features, "xop", true); @@ -3479,14 +3598,12 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasPKU = true; } else if (Feature == "+clflushopt") { HasCLFLUSHOPT = true; - } else if (Feature == "+pcommit") { - HasPCOMMIT = true; } else if (Feature == "+clwb") { HasCLWB = true; - } else if (Feature == "+umip") { - HasUMIP = true; } else if (Feature == "+prefetchwt1") { HasPREFETCHWT1 = true; + } else if (Feature == "+clzero") { + HasCLZERO = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) @@ -3695,6 +3812,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_BDVER4: defineCPUMacros(Builder, "bdver4"); break; + case CK_ZNVER1: + defineCPUMacros(Builder, "znver1"); + break; case CK_Geode: defineCPUMacros(Builder, "geode"); break; @@ -3801,6 +3921,18 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__PKU__"); if (HasCX16) Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16"); + if (HasCLFLUSHOPT) + Builder.defineMacro("__CLFLUSHOPT__"); + if (HasCLWB) + Builder.defineMacro("__CLWB__"); + if (HasMPX) + Builder.defineMacro("__MPX__"); + if (HasSGX) + Builder.defineMacro("__SGX__"); + if (HasPREFETCHWT1) + Builder.defineMacro("__PREFETCHWT1__"); + if (HasCLZERO) + Builder.defineMacro("__CLZERO__"); // Each case falls through to the previous one here. switch (SSELevel) { @@ -3887,6 +4019,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("bmi2", HasBMI2) .Case("clflushopt", HasCLFLUSHOPT) .Case("clwb", HasCLWB) + .Case("clzero", HasCLZERO) .Case("cx16", HasCX16) .Case("f16c", HasF16C) .Case("fma", HasFMA) @@ -3900,7 +4033,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("movbe", HasMOVBE) .Case("mpx", HasMPX) .Case("pclmul", HasPCLMUL) - .Case("pcommit", HasPCOMMIT) .Case("pku", HasPKU) .Case("popcnt", HasPOPCNT) .Case("prefetchwt1", HasPREFETCHWT1) @@ -3918,7 +4050,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("sse4.2", SSELevel >= SSE42) .Case("sse4a", XOPLevel >= SSE4A) .Case("tbm", HasTBM) - .Case("umip", HasUMIP) .Case("x86", true) .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) @@ -4972,6 +5103,8 @@ class ARMTargetInfo : public TargetInfo { return "7M"; case llvm::ARM::AK_ARMV7EM: return "7EM"; + case llvm::ARM::AK_ARMV7VE: + return "7VE"; case llvm::ARM::AK_ARMV8A: return "8A"; case llvm::ARM::AK_ARMV8_1A: @@ -5060,6 +5193,8 @@ class ARMTargetInfo : public TargetInfo { default: if (Triple.getOS() == llvm::Triple::NetBSD) setABI("apcs-gnu"); + else if (Triple.getOS() == llvm::Triple::OpenBSD) + setABI("aapcs-linux"); else setABI("aapcs"); break; @@ -5391,6 +5526,8 @@ class ARMTargetInfo : public TargetInfo { Builder.defineMacro("__ARM_VFPV3__"); if (FPU & VFP4FPU) Builder.defineMacro("__ARM_VFPV4__"); + if (FPU & FPARMV8) + Builder.defineMacro("__ARM_FPV5__"); } // This only gets set when Neon instructions are actually available, unlike @@ -5839,7 +5976,8 @@ class AArch64TargetInfo : public TargetInfo { public: AArch64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : TargetInfo(Triple), ABI("aapcs") { - if (getTriple().getOS() == llvm::Triple::NetBSD) { + if (getTriple().getOS() == llvm::Triple::NetBSD || + getTriple().getOS() == llvm::Triple::OpenBSD) { WCharType = SignedInt; // NetBSD apparently prefers consistency across ARM targets to consistency @@ -5874,8 +6012,9 @@ class AArch64TargetInfo : public TargetInfo { // AArch64 targets default to using the ARM C++ ABI. TheCXXABI.set(TargetCXXABI::GenericAArch64); - if (Triple.getOS() == llvm::Triple::Linux || - Triple.getOS() == llvm::Triple::UnknownOS) + if (Triple.getOS() == llvm::Triple::Linux) + this->MCountName = "\01_mcount"; + else if (Triple.getOS() == llvm::Triple::UnknownOS) this->MCountName = Opts.EABIVersion == "gnu" ? "\01_mcount" : "mcount"; } @@ -6951,6 +7090,15 @@ class SystemZTargetInfo : public TargetInfo { Builder.defineMacro("__zarch__"); Builder.defineMacro("__LONG_DOUBLE_128__"); + const std::string ISARev = llvm::StringSwitch(CPU) + .Cases("arch8", "z10", "8") + .Cases("arch9", "z196", "9") + .Cases("arch10", "zEC12", "10") + .Cases("arch11", "z13", "11") + .Default(""); + if (!ISARev.empty()) + Builder.defineMacro("__ARCH__", ISARev); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); @@ -6958,6 +7106,8 @@ class SystemZTargetInfo : public TargetInfo { if (HasTransactionalExecution) Builder.defineMacro("__HTM__"); + if (HasVector) + Builder.defineMacro("__VX__"); if (Opts.ZVector) Builder.defineMacro("__VEC__", "10301"); } @@ -7348,6 +7498,8 @@ class MipsTargetInfo : public TargetInfo { bool IsMicromips; bool IsNan2008; bool IsSingleFloat; + bool IsNoABICalls; + bool CanUseBSDABICalls; enum MipsFloatABI { HardFloat, SoftFloat } FloatABI; @@ -7363,8 +7515,9 @@ class MipsTargetInfo : public TargetInfo { public: MipsTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple), IsMips16(false), IsMicromips(false), - IsNan2008(false), IsSingleFloat(false), FloatABI(HardFloat), - DspRev(NoDSP), HasMSA(false), HasFP64(false) { + IsNan2008(false), IsSingleFloat(false), IsNoABICalls(false), + CanUseBSDABICalls(false), FloatABI(HardFloat), DspRev(NoDSP), + HasMSA(false), HasFP64(false) { TheCXXABI.set(TargetCXXABI::GenericMIPS); setABI((getTriple().getArch() == llvm::Triple::mips || @@ -7373,6 +7526,9 @@ class MipsTargetInfo : public TargetInfo { : "n64"); CPU = ABI == "o32" ? "mips32r2" : "mips64r2"; + + CanUseBSDABICalls = Triple.getOS() == llvm::Triple::FreeBSD || + Triple.getOS() == llvm::Triple::OpenBSD; } bool isNaN2008Default() const { @@ -7449,7 +7605,11 @@ class MipsTargetInfo : public TargetInfo { void setN64ABITypes() { setN32N64ABITypes(); - Int64Type = SignedLong; + if (getTriple().getOS() == llvm::Triple::OpenBSD) { + Int64Type = SignedLongLong; + } else { + Int64Type = SignedLong; + } IntMaxType = Int64Type; LongWidth = LongAlign = 64; PointerWidth = PointerAlign = 64; @@ -7553,6 +7713,12 @@ class MipsTargetInfo : public TargetInfo { } else llvm_unreachable("Invalid ABI."); + if (!IsNoABICalls) { + Builder.defineMacro("__mips_abicalls"); + if (CanUseBSDABICalls) + Builder.defineMacro("__ABICALLS__"); + } + Builder.defineMacro("__REGISTER_PREFIX__", ""); switch (FloatABI) { @@ -7767,6 +7933,8 @@ class MipsTargetInfo : public TargetInfo { IsNan2008 = true; else if (Feature == "-nan2008") IsNan2008 = false; + else if (Feature == "+noabicalls") + IsNoABICalls = true; } setDataLayout(); @@ -8188,7 +8356,7 @@ class SPIRTargetInfo : public TargetInfo { void setSupportedOpenCLOpts() override { // Assume all OpenCL extensions and optional core features are supported // for SPIR since it is a generic target. - getSupportedOpenCLOpts().setAll(); + getSupportedOpenCLOpts().supportAll(); } }; @@ -8446,6 +8614,451 @@ class RenderScript64TargetInfo : public AArch64leTargetInfo { } }; +/// Information about a specific microcontroller. +struct MCUInfo { + const char *Name; + const char *DefineName; +}; + +// This list should be kept up-to-date with AVRDevices.td in LLVM. +static ArrayRef AVRMcus = { + { "at90s1200", "__AVR_AT90S1200__" }, + { "attiny11", "__AVR_ATtiny11__" }, + { "attiny12", "__AVR_ATtiny12__" }, + { "attiny15", "__AVR_ATtiny15__" }, + { "attiny28", "__AVR_ATtiny28__" }, + { "at90s2313", "__AVR_AT90S2313__" }, + { "at90s2323", "__AVR_AT90S2323__" }, + { "at90s2333", "__AVR_AT90S2333__" }, + { "at90s2343", "__AVR_AT90S2343__" }, + { "attiny22", "__AVR_ATtiny22__" }, + { "attiny26", "__AVR_ATtiny26__" }, + { "at86rf401", "__AVR_AT86RF401__" }, + { "at90s4414", "__AVR_AT90S4414__" }, + { "at90s4433", "__AVR_AT90S4433__" }, + { "at90s4434", "__AVR_AT90S4434__" }, + { "at90s8515", "__AVR_AT90S8515__" }, + { "at90c8534", "__AVR_AT90c8534__" }, + { "at90s8535", "__AVR_AT90S8535__" }, + { "ata5272", "__AVR_ATA5272__" }, + { "attiny13", "__AVR_ATtiny13__" }, + { "attiny13a", "__AVR_ATtiny13A__" }, + { "attiny2313", "__AVR_ATtiny2313__" }, + { "attiny2313a", "__AVR_ATtiny2313A__" }, + { "attiny24", "__AVR_ATtiny24__" }, + { "attiny24a", "__AVR_ATtiny24A__" }, + { "attiny4313", "__AVR_ATtiny4313__" }, + { "attiny44", "__AVR_ATtiny44__" }, + { "attiny44a", "__AVR_ATtiny44A__" }, + { "attiny84", "__AVR_ATtiny84__" }, + { "attiny84a", "__AVR_ATtiny84A__" }, + { "attiny25", "__AVR_ATtiny25__" }, + { "attiny45", "__AVR_ATtiny45__" }, + { "attiny85", "__AVR_ATtiny85__" }, + { "attiny261", "__AVR_ATtiny261__" }, + { "attiny261a", "__AVR_ATtiny261A__" }, + { "attiny461", "__AVR_ATtiny461__" }, + { "attiny461a", "__AVR_ATtiny461A__" }, + { "attiny861", "__AVR_ATtiny861__" }, + { "attiny861a", "__AVR_ATtiny861A__" }, + { "attiny87", "__AVR_ATtiny87__" }, + { "attiny43u", "__AVR_ATtiny43U__" }, + { "attiny48", "__AVR_ATtiny48__" }, + { "attiny88", "__AVR_ATtiny88__" }, + { "attiny828", "__AVR_ATtiny828__" }, + { "at43usb355", "__AVR_AT43USB355__" }, + { "at76c711", "__AVR_AT76C711__" }, + { "atmega103", "__AVR_ATmega103__" }, + { "at43usb320", "__AVR_AT43USB320__" }, + { "attiny167", "__AVR_ATtiny167__" }, + { "at90usb82", "__AVR_AT90USB82__" }, + { "at90usb162", "__AVR_AT90USB162__" }, + { "ata5505", "__AVR_ATA5505__" }, + { "atmega8u2", "__AVR_ATmega8U2__" }, + { "atmega16u2", "__AVR_ATmega16U2__" }, + { "atmega32u2", "__AVR_ATmega32U2__" }, + { "attiny1634", "__AVR_ATtiny1634__" }, + { "atmega8", "__AVR_ATmega8__" }, + { "ata6289", "__AVR_ATA6289__" }, + { "atmega8a", "__AVR_ATmega8A__" }, + { "ata6285", "__AVR_ATA6285__" }, + { "ata6286", "__AVR_ATA6286__" }, + { "atmega48", "__AVR_ATmega48__" }, + { "atmega48a", "__AVR_ATmega48A__" }, + { "atmega48pa", "__AVR_ATmega48PA__" }, + { "atmega48p", "__AVR_ATmega48P__" }, + { "atmega88", "__AVR_ATmega88__" }, + { "atmega88a", "__AVR_ATmega88A__" }, + { "atmega88p", "__AVR_ATmega88P__" }, + { "atmega88pa", "__AVR_ATmega88PA__" }, + { "atmega8515", "__AVR_ATmega8515__" }, + { "atmega8535", "__AVR_ATmega8535__" }, + { "atmega8hva", "__AVR_ATmega8HVA__" }, + { "at90pwm1", "__AVR_AT90PWM1__" }, + { "at90pwm2", "__AVR_AT90PWM2__" }, + { "at90pwm2b", "__AVR_AT90PWM2B__" }, + { "at90pwm3", "__AVR_AT90PWM3__" }, + { "at90pwm3b", "__AVR_AT90PWM3B__" }, + { "at90pwm81", "__AVR_AT90PWM81__" }, + { "ata5790", "__AVR_ATA5790__" }, + { "ata5795", "__AVR_ATA5795__" }, + { "atmega16", "__AVR_ATmega16__" }, + { "atmega16a", "__AVR_ATmega16A__" }, + { "atmega161", "__AVR_ATmega161__" }, + { "atmega162", "__AVR_ATmega162__" }, + { "atmega163", "__AVR_ATmega163__" }, + { "atmega164a", "__AVR_ATmega164A__" }, + { "atmega164p", "__AVR_ATmega164P__" }, + { "atmega164pa", "__AVR_ATmega164PA__" }, + { "atmega165", "__AVR_ATmega165__" }, + { "atmega165a", "__AVR_ATmega165A__" }, + { "atmega165p", "__AVR_ATmega165P__" }, + { "atmega165pa", "__AVR_ATmega165PA__" }, + { "atmega168", "__AVR_ATmega168__" }, + { "atmega168a", "__AVR_ATmega168A__" }, + { "atmega168p", "__AVR_ATmega168P__" }, + { "atmega168pa", "__AVR_ATmega168PA__" }, + { "atmega169", "__AVR_ATmega169__" }, + { "atmega169a", "__AVR_ATmega169A__" }, + { "atmega169p", "__AVR_ATmega169P__" }, + { "atmega169pa", "__AVR_ATmega169PA__" }, + { "atmega32", "__AVR_ATmega32__" }, + { "atmega32a", "__AVR_ATmega32A__" }, + { "atmega323", "__AVR_ATmega323__" }, + { "atmega324a", "__AVR_ATmega324A__" }, + { "atmega324p", "__AVR_ATmega324P__" }, + { "atmega324pa", "__AVR_ATmega324PA__" }, + { "atmega325", "__AVR_ATmega325__" }, + { "atmega325a", "__AVR_ATmega325A__" }, + { "atmega325p", "__AVR_ATmega325P__" }, + { "atmega325pa", "__AVR_ATmega325PA__" }, + { "atmega3250", "__AVR_ATmega3250__" }, + { "atmega3250a", "__AVR_ATmega3250A__" }, + { "atmega3250p", "__AVR_ATmega3250P__" }, + { "atmega3250pa", "__AVR_ATmega3250PA__" }, + { "atmega328", "__AVR_ATmega328__" }, + { "atmega328p", "__AVR_ATmega328P__" }, + { "atmega329", "__AVR_ATmega329__" }, + { "atmega329a", "__AVR_ATmega329A__" }, + { "atmega329p", "__AVR_ATmega329P__" }, + { "atmega329pa", "__AVR_ATmega329PA__" }, + { "atmega3290", "__AVR_ATmega3290__" }, + { "atmega3290a", "__AVR_ATmega3290A__" }, + { "atmega3290p", "__AVR_ATmega3290P__" }, + { "atmega3290pa", "__AVR_ATmega3290PA__" }, + { "atmega406", "__AVR_ATmega406__" }, + { "atmega64", "__AVR_ATmega64__" }, + { "atmega64a", "__AVR_ATmega64A__" }, + { "atmega640", "__AVR_ATmega640__" }, + { "atmega644", "__AVR_ATmega644__" }, + { "atmega644a", "__AVR_ATmega644A__" }, + { "atmega644p", "__AVR_ATmega644P__" }, + { "atmega644pa", "__AVR_ATmega644PA__" }, + { "atmega645", "__AVR_ATmega645__" }, + { "atmega645a", "__AVR_ATmega645A__" }, + { "atmega645p", "__AVR_ATmega645P__" }, + { "atmega649", "__AVR_ATmega649__" }, + { "atmega649a", "__AVR_ATmega649A__" }, + { "atmega649p", "__AVR_ATmega649P__" }, + { "atmega6450", "__AVR_ATmega6450__" }, + { "atmega6450a", "__AVR_ATmega6450A__" }, + { "atmega6450p", "__AVR_ATmega6450P__" }, + { "atmega6490", "__AVR_ATmega6490__" }, + { "atmega6490a", "__AVR_ATmega6490A__" }, + { "atmega6490p", "__AVR_ATmega6490P__" }, + { "atmega64rfr2", "__AVR_ATmega64RFR2__" }, + { "atmega644rfr2", "__AVR_ATmega644RFR2__" }, + { "atmega16hva", "__AVR_ATmega16HVA__" }, + { "atmega16hva2", "__AVR_ATmega16HVA2__" }, + { "atmega16hvb", "__AVR_ATmega16HVB__" }, + { "atmega16hvbrevb", "__AVR_ATmega16HVBREVB__" }, + { "atmega32hvb", "__AVR_ATmega32HVB__" }, + { "atmega32hvbrevb", "__AVR_ATmega32HVBREVB__" }, + { "atmega64hve", "__AVR_ATmega64HVE__" }, + { "at90can32", "__AVR_AT90CAN32__" }, + { "at90can64", "__AVR_AT90CAN64__" }, + { "at90pwm161", "__AVR_AT90PWM161__" }, + { "at90pwm216", "__AVR_AT90PWM216__" }, + { "at90pwm316", "__AVR_AT90PWM316__" }, + { "atmega32c1", "__AVR_ATmega32C1__" }, + { "atmega64c1", "__AVR_ATmega64C1__" }, + { "atmega16m1", "__AVR_ATmega16M1__" }, + { "atmega32m1", "__AVR_ATmega32M1__" }, + { "atmega64m1", "__AVR_ATmega64M1__" }, + { "atmega16u4", "__AVR_ATmega16U4__" }, + { "atmega32u4", "__AVR_ATmega32U4__" }, + { "atmega32u6", "__AVR_ATmega32U6__" }, + { "at90usb646", "__AVR_AT90USB646__" }, + { "at90usb647", "__AVR_AT90USB647__" }, + { "at90scr100", "__AVR_AT90SCR100__" }, + { "at94k", "__AVR_AT94K__" }, + { "m3000", "__AVR_AT000__" }, + { "atmega128", "__AVR_ATmega128__" }, + { "atmega128a", "__AVR_ATmega128A__" }, + { "atmega1280", "__AVR_ATmega1280__" }, + { "atmega1281", "__AVR_ATmega1281__" }, + { "atmega1284", "__AVR_ATmega1284__" }, + { "atmega1284p", "__AVR_ATmega1284P__" }, + { "atmega128rfa1", "__AVR_ATmega128RFA1__" }, + { "atmega128rfr2", "__AVR_ATmega128RFR2__" }, + { "atmega1284rfr2", "__AVR_ATmega1284RFR2__" }, + { "at90can128", "__AVR_AT90CAN128__" }, + { "at90usb1286", "__AVR_AT90USB1286__" }, + { "at90usb1287", "__AVR_AT90USB1287__" }, + { "atmega2560", "__AVR_ATmega2560__" }, + { "atmega2561", "__AVR_ATmega2561__" }, + { "atmega256rfr2", "__AVR_ATmega256RFR2__" }, + { "atmega2564rfr2", "__AVR_ATmega2564RFR2__" }, + { "atxmega16a4", "__AVR_ATxmega16A4__" }, + { "atxmega16a4u", "__AVR_ATxmega16a4U__" }, + { "atxmega16c4", "__AVR_ATxmega16C4__" }, + { "atxmega16d4", "__AVR_ATxmega16D4__" }, + { "atxmega32a4", "__AVR_ATxmega32A4__" }, + { "atxmega32a4u", "__AVR_ATxmega32A4U__" }, + { "atxmega32c4", "__AVR_ATxmega32C4__" }, + { "atxmega32d4", "__AVR_ATxmega32D4__" }, + { "atxmega32e5", "__AVR_ATxmega32E5__" }, + { "atxmega16e5", "__AVR_ATxmega16E5__" }, + { "atxmega8e5", "__AVR_ATxmega8E5__" }, + { "atxmega32x1", "__AVR_ATxmega32X1__" }, + { "atxmega64a3", "__AVR_ATxmega64A3__" }, + { "atxmega64a3u", "__AVR_ATxmega64A3U__" }, + { "atxmega64a4u", "__AVR_ATxmega64A4U__" }, + { "atxmega64b1", "__AVR_ATxmega64B1__" }, + { "atxmega64b3", "__AVR_ATxmega64B3__" }, + { "atxmega64c3", "__AVR_ATxmega64C3__" }, + { "atxmega64d3", "__AVR_ATxmega64D3__" }, + { "atxmega64d4", "__AVR_ATxmega64D4__" }, + { "atxmega64a1", "__AVR_ATxmega64A1__" }, + { "atxmega64a1u", "__AVR_ATxmega64A1U__" }, + { "atxmega128a3", "__AVR_ATxmega128A3__" }, + { "atxmega128a3u", "__AVR_ATxmega128A3U__" }, + { "atxmega128b1", "__AVR_ATxmega128B1__" }, + { "atxmega128b3", "__AVR_ATxmega128B3__" }, + { "atxmega128c3", "__AVR_ATxmega128C3__" }, + { "atxmega128d3", "__AVR_ATxmega128D3__" }, + { "atxmega128d4", "__AVR_ATxmega128D4__" }, + { "atxmega192a3", "__AVR_ATxmega192A3__" }, + { "atxmega192a3u", "__AVR_ATxmega192A3U__" }, + { "atxmega192c3", "__AVR_ATxmega192C3__" }, + { "atxmega192d3", "__AVR_ATxmega192D3__" }, + { "atxmega256a3", "__AVR_ATxmega256A3__" }, + { "atxmega256a3u", "__AVR_ATxmega256A3U__" }, + { "atxmega256a3b", "__AVR_ATxmega256A3B__" }, + { "atxmega256a3bu", "__AVR_ATxmega256A3BU__" }, + { "atxmega256c3", "__AVR_ATxmega256C3__" }, + { "atxmega256d3", "__AVR_ATxmega256D3__" }, + { "atxmega384c3", "__AVR_ATxmega384C3__" }, + { "atxmega384d3", "__AVR_ATxmega384D3__" }, + { "atxmega128a1", "__AVR_ATxmega128A1__" }, + { "atxmega128a1u", "__AVR_ATxmega128A1U__" }, + { "atxmega128a4u", "__AVR_ATxmega128a4U__" }, + { "attiny4", "__AVR_ATtiny4__" }, + { "attiny5", "__AVR_ATtiny5__" }, + { "attiny9", "__AVR_ATtiny9__" }, + { "attiny10", "__AVR_ATtiny10__" }, + { "attiny20", "__AVR_ATtiny20__" }, + { "attiny40", "__AVR_ATtiny40__" }, + { "attiny102", "__AVR_ATtiny102__" }, + { "attiny104", "__AVR_ATtiny104__" }, +}; + +// AVR Target +class AVRTargetInfo : public TargetInfo { +public: + AVRTargetInfo(const llvm::Triple &Triple, const TargetOptions &) + : TargetInfo(Triple) { + TLSSupported = false; + PointerWidth = 16; + PointerAlign = 8; + IntWidth = 16; + IntAlign = 8; + LongWidth = 32; + LongAlign = 8; + LongLongWidth = 64; + LongLongAlign = 8; + SuitableAlign = 8; + DefaultAlignForAttributeAligned = 8; + HalfWidth = 16; + HalfAlign = 8; + FloatWidth = 32; + FloatAlign = 8; + DoubleWidth = 32; + DoubleAlign = 8; + DoubleFormat = &llvm::APFloat::IEEEsingle(); + LongDoubleWidth = 32; + LongDoubleAlign = 8; + LongDoubleFormat = &llvm::APFloat::IEEEsingle(); + SizeType = UnsignedInt; + PtrDiffType = SignedInt; + IntPtrType = SignedInt; + Char16Type = UnsignedInt; + WCharType = SignedInt; + WIntType = SignedInt; + Char32Type = UnsignedLong; + SigAtomicType = SignedChar; + resetDataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:32:32-i64:64:64" + "-f32:32:32-f64:64:64-n8"); + } + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override { + Builder.defineMacro("AVR"); + Builder.defineMacro("__AVR"); + Builder.defineMacro("__AVR__"); + + if (!this->CPU.empty()) { + auto It = std::find_if(AVRMcus.begin(), AVRMcus.end(), + [&](const MCUInfo &Info) { return Info.Name == this->CPU; }); + + if (It != AVRMcus.end()) + Builder.defineMacro(It->DefineName); + } + } + + ArrayRef getTargetBuiltins() const override { + return None; + } + + BuiltinVaListKind getBuiltinVaListKind() const override { + return TargetInfo::VoidPtrBuiltinVaList; + } + + const char *getClobbers() const override { + return ""; + } + + ArrayRef getGCCRegNames() const override { + static const char * const GCCRegNames[] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "X", "Y", "Z", "SP" + }; + return llvm::makeArrayRef(GCCRegNames); + } + + ArrayRef getGCCRegAliases() const override { + return None; + } + + ArrayRef getGCCAddlRegNames() const override { + static const TargetInfo::AddlRegName AddlRegNames[] = { + { { "r26", "r27"}, 26 }, + { { "r28", "r29"}, 27 }, + { { "r30", "r31"}, 28 }, + { { "SPL", "SPH"}, 29 }, + }; + return llvm::makeArrayRef(AddlRegNames); + } + + bool validateAsmConstraint(const char *&Name, + TargetInfo::ConstraintInfo &Info) const override { + // There aren't any multi-character AVR specific constraints. + if (StringRef(Name).size() > 1) return false; + + switch (*Name) { + default: return false; + case 'a': // Simple upper registers + case 'b': // Base pointer registers pairs + case 'd': // Upper register + case 'l': // Lower registers + case 'e': // Pointer register pairs + case 'q': // Stack pointer register + case 'r': // Any register + case 'w': // Special upper register pairs + case 't': // Temporary register + case 'x': case 'X': // Pointer register pair X + case 'y': case 'Y': // Pointer register pair Y + case 'z': case 'Z': // Pointer register pair Z + Info.setAllowsRegister(); + return true; + case 'I': // 6-bit positive integer constant + Info.setRequiresImmediate(0, 63); + return true; + case 'J': // 6-bit negative integer constant + Info.setRequiresImmediate(-63, 0); + return true; + case 'K': // Integer constant (Range: 2) + Info.setRequiresImmediate(2); + return true; + case 'L': // Integer constant (Range: 0) + Info.setRequiresImmediate(0); + return true; + case 'M': // 8-bit integer constant + Info.setRequiresImmediate(0, 0xff); + return true; + case 'N': // Integer constant (Range: -1) + Info.setRequiresImmediate(-1); + return true; + case 'O': // Integer constant (Range: 8, 16, 24) + Info.setRequiresImmediate({8, 16, 24}); + return true; + case 'P': // Integer constant (Range: 1) + Info.setRequiresImmediate(1); + return true; + case 'R': // Integer constant (Range: -6 to 5) + Info.setRequiresImmediate(-6, 5); + return true; + case 'G': // Floating point constant + case 'Q': // A memory address based on Y or Z pointer with displacement. + return true; + } + + return false; + } + + IntType getIntTypeByWidth(unsigned BitWidth, + bool IsSigned) const final { + // AVR prefers int for 16-bit integers. + return BitWidth == 16 ? (IsSigned ? SignedInt : UnsignedInt) + : TargetInfo::getIntTypeByWidth(BitWidth, IsSigned); + } + + IntType getLeastIntTypeByWidth(unsigned BitWidth, + bool IsSigned) const final { + // AVR uses int for int_least16_t and int_fast16_t. + return BitWidth == 16 + ? (IsSigned ? SignedInt : UnsignedInt) + : TargetInfo::getLeastIntTypeByWidth(BitWidth, IsSigned); + } + + bool setCPU(const std::string &Name) override { + bool IsFamily = llvm::StringSwitch(Name) + .Case("avr1", true) + .Case("avr2", true) + .Case("avr25", true) + .Case("avr3", true) + .Case("avr31", true) + .Case("avr35", true) + .Case("avr4", true) + .Case("avr5", true) + .Case("avr51", true) + .Case("avr6", true) + .Case("avrxmega1", true) + .Case("avrxmega2", true) + .Case("avrxmega3", true) + .Case("avrxmega4", true) + .Case("avrxmega5", true) + .Case("avrxmega6", true) + .Case("avrxmega7", true) + .Case("avrtiny", true) + .Default(false); + + if (IsFamily) this->CPU = Name; + + bool IsMCU = std::find_if(AVRMcus.begin(), AVRMcus.end(), + [&](const MCUInfo &Info) { return Info.Name == Name; }) != AVRMcus.end(); + + if (IsMCU) this->CPU = Name; + + return IsFamily || IsMCU; + } + +protected: + std::string CPU; +}; + } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -8484,6 +9097,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple, return new LinuxTargetInfo(Triple, Opts); case llvm::Triple::NetBSD: return new NetBSDTargetInfo(Triple, Opts); + case llvm::Triple::OpenBSD: + return new OpenBSDTargetInfo(Triple, Opts); default: return new AArch64leTargetInfo(Triple, Opts); } @@ -8514,8 +9129,6 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple, return new LinuxTargetInfo(Triple, Opts); case llvm::Triple::FreeBSD: return new FreeBSDTargetInfo(Triple, Opts); - case llvm::Triple::Fuchsia: - return new FuchsiaTargetInfo(Triple, Opts); case llvm::Triple::NetBSD: return new NetBSDTargetInfo(Triple, Opts); case llvm::Triple::OpenBSD: @@ -8552,8 +9165,6 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple, return new LinuxTargetInfo(Triple, Opts); case llvm::Triple::FreeBSD: return new FreeBSDTargetInfo(Triple, Opts); - case llvm::Triple::Fuchsia: - return new FuchsiaTargetInfo(Triple, Opts); case llvm::Triple::NetBSD: return new NetBSDTargetInfo(Triple, Opts); case llvm::Triple::OpenBSD: @@ -8568,6 +9179,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple, return new ARMbeTargetInfo(Triple, Opts); } + case llvm::Triple::avr: + return new AVRTargetInfo(Triple, Opts); case llvm::Triple::bpfeb: case llvm::Triple::bpfel: return new BPFTargetInfo(Triple, Opts); @@ -8693,9 +9306,9 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple, } case llvm::Triple::nvptx: - return new NVPTX32TargetInfo(Triple, Opts); + return new NVPTXTargetInfo(Triple, Opts, /*TargetPointerWidth=*/32); case llvm::Triple::nvptx64: - return new NVPTX64TargetInfo(Triple, Opts); + return new NVPTXTargetInfo(Triple, Opts, /*TargetPointerWidth=*/64); case llvm::Triple::amdgcn: case llvm::Triple::r600: @@ -8787,8 +9400,6 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple, return new BitrigI386TargetInfo(Triple, Opts); case llvm::Triple::FreeBSD: return new FreeBSDTargetInfo(Triple, Opts); - case llvm::Triple::Fuchsia: - return new FuchsiaTargetInfo(Triple, Opts); case llvm::Triple::KFreeBSD: return new KFreeBSDTargetInfo(Triple, Opts); case llvm::Triple::Minix: @@ -8884,11 +9495,19 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple, return new SPIR64TargetInfo(Triple, Opts); } case llvm::Triple::wasm32: - if (!(Triple == llvm::Triple("wasm32-unknown-unknown"))) + if (Triple.getSubArch() != llvm::Triple::NoSubArch || + Triple.getVendor() != llvm::Triple::UnknownVendor || + Triple.getOS() != llvm::Triple::UnknownOS || + Triple.getEnvironment() != llvm::Triple::UnknownEnvironment || + !(Triple.isOSBinFormatELF() || Triple.isOSBinFormatWasm())) return nullptr; return new WebAssemblyOSTargetInfo(Triple, Opts); case llvm::Triple::wasm64: - if (!(Triple == llvm::Triple("wasm64-unknown-unknown"))) + if (Triple.getSubArch() != llvm::Triple::NoSubArch || + Triple.getVendor() != llvm::Triple::UnknownVendor || + Triple.getOS() != llvm::Triple::UnknownOS || + Triple.getEnvironment() != llvm::Triple::UnknownEnvironment || + !(Triple.isOSBinFormatELF() || Triple.isOSBinFormatWasm())) return nullptr; return new WebAssemblyOSTargetInfo(Triple, Opts); case llvm::Triple::z80: diff --git a/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/tools/clang/lib/CodeGen/CGDebugInfo.cpp index 2c6ea74..c46399b 100644 --- a/tools/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/tools/clang/lib/CodeGen/CGDebugInfo.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" using namespace clang; using namespace clang::CodeGen; @@ -106,8 +107,8 @@ void ApplyDebugLocation::init(SourceLocation TemporaryLocation, // Construct a location that has a valid scope, but no line info. assert(!DI->LexicalBlockStack.empty()); - CGF->Builder.SetCurrentDebugLocation( - llvm::DebugLoc::get(0, 0, DI->LexicalBlockStack.back())); + CGF->Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( + 0, 0, DI->LexicalBlockStack.back(), DI->getInlinedAt())); } ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E) @@ -133,6 +134,30 @@ ApplyDebugLocation::~ApplyDebugLocation() { CGF->Builder.SetCurrentDebugLocation(std::move(OriginalLocation)); } +ApplyInlineDebugLocation::ApplyInlineDebugLocation(CodeGenFunction &CGF, + GlobalDecl InlinedFn) + : CGF(&CGF) { + if (!CGF.getDebugInfo()) { + this->CGF = nullptr; + return; + } + auto &DI = *CGF.getDebugInfo(); + SavedLocation = DI.getLocation(); + assert((DI.getInlinedAt() == + CGF.Builder.getCurrentDebugLocation()->getInlinedAt()) && + "CGDebugInfo and IRBuilder are out of sync"); + + DI.EmitInlineFunctionStart(CGF.Builder, InlinedFn); +} + +ApplyInlineDebugLocation::~ApplyInlineDebugLocation() { + if (!CGF) + return; + auto &DI = *CGF->getDebugInfo(); + DI.EmitInlineFunctionEnd(CGF->Builder); + DI.EmitLocation(CGF->Builder, SavedLocation); +} + void CGDebugInfo::setLocation(SourceLocation Loc) { // If the new location isn't valid return. if (Loc.isInvalid()) @@ -248,8 +273,8 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) { << OC->getIdentifier()->getNameStart() << ')'; } } else if (const auto *OCD = dyn_cast(DC)) { - OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '(' - << OCD->getIdentifier()->getNameStart() << ')'; + OS << OCD->getClassInterface()->getName() << '(' + << OCD->getName() << ')'; } else if (isa(DC)) { // We can extract the type of the class from the self pointer. if (ImplicitParamDecl *SelfDecl = OMD->getSelfDecl()) { @@ -320,11 +345,36 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) { return StringRef(); } +llvm::DIFile::ChecksumKind +CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const { + Checksum.clear(); + + if (!CGM.getCodeGenOpts().EmitCodeView) + return llvm::DIFile::CSK_None; + + SourceManager &SM = CGM.getContext().getSourceManager(); + bool Invalid; + llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid); + if (Invalid) + return llvm::DIFile::CSK_None; + + llvm::MD5 Hash; + llvm::MD5::MD5Result Result; + + Hash.update(MemBuffer->getBuffer()); + Hash.final(Result); + + Hash.stringifyResult(Result, Checksum); + return llvm::DIFile::CSK_MD5; +} + llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { if (!Loc.isValid()) // If Location is not valid then use main input file. return DBuilder.createFile(remapDIPath(TheCU->getFilename()), - remapDIPath(TheCU->getDirectory())); + remapDIPath(TheCU->getDirectory()), + TheCU->getFile()->getChecksumKind(), + TheCU->getFile()->getChecksum()); SourceManager &SM = CGM.getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(Loc); @@ -332,7 +382,9 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty()) // If the location is not valid then use main input file. return DBuilder.createFile(remapDIPath(TheCU->getFilename()), - remapDIPath(TheCU->getDirectory())); + remapDIPath(TheCU->getDirectory()), + TheCU->getFile()->getChecksumKind(), + TheCU->getFile()->getChecksum()); // Cache the results. const char *fname = PLoc.getFilename(); @@ -344,8 +396,13 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { return cast(V); } + SmallString<32> Checksum; + llvm::DIFile::ChecksumKind CSKind = + computeChecksum(SM.getFileID(Loc), Checksum); + llvm::DIFile *F = DBuilder.createFile(remapDIPath(PLoc.getFilename()), - remapDIPath(getCurrentDirname())); + remapDIPath(getCurrentDirname()), + CSKind, Checksum); DIFileCache[fname].reset(F); return F; @@ -353,7 +410,9 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { llvm::DIFile *CGDebugInfo::getOrCreateMainFile() { return DBuilder.createFile(remapDIPath(TheCU->getFilename()), - remapDIPath(TheCU->getDirectory())); + remapDIPath(TheCU->getDirectory()), + TheCU->getFile()->getChecksumKind(), + TheCU->getFile()->getChecksum()); } std::string CGDebugInfo::remapDIPath(StringRef Path) const { @@ -396,6 +455,8 @@ StringRef CGDebugInfo::getCurrentDirname() { } void CGDebugInfo::CreateCompileUnit() { + SmallString<32> Checksum; + llvm::DIFile::ChecksumKind CSKind = llvm::DIFile::CSK_None; // Should we be asking the SourceManager for the main file name, instead of // accepting it as an argument? This just causes the main file name to @@ -422,6 +483,7 @@ void CGDebugInfo::CreateCompileUnit() { llvm::sys::path::append(MainFileDirSS, MainFileName); MainFileName = MainFileDirSS.str(); } + CSKind = computeChecksum(SM.getMainFileID(), Checksum); } llvm::dwarf::SourceLanguage LangTag; @@ -467,10 +529,12 @@ void CGDebugInfo::CreateCompileUnit() { // FIXME - Eliminate TheCU. TheCU = DBuilder.createCompileUnit( LangTag, DBuilder.createFile(remapDIPath(MainFileName), - remapDIPath(getCurrentDirname())), + remapDIPath(getCurrentDirname()), CSKind, + Checksum), Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */, - CGM.getCodeGenOpts().SplitDwarfInlining); + CGM.getCodeGenOpts().SplitDwarfInlining, + CGM.getCodeGenOpts().DebugInfoForProfiling); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -542,8 +606,6 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return getOrCreateStructPtrType("opencl_clk_event_t", OCLClkEventDITy); case BuiltinType::OCLQueue: return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy); - case BuiltinType::OCLNDRange: - return getOrCreateStructPtrType("opencl_ndrange_t", OCLNDRangeDITy); case BuiltinType::OCLReserveID: return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy); @@ -738,12 +800,6 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty, uint64_t Size = 0; uint32_t Align = 0; - const RecordDecl *D = RD->getDefinition(); - if (D && D->isCompleteDefinition()) { - Size = CGM.getContext().getTypeSize(Ty); - Align = getDeclAlignIfRequired(D, CGM.getContext()); - } - // Create the type. SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU); llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType( @@ -762,17 +818,19 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, // Bit size, align and offset of the type. // Size is always the size of a pointer. We can't use getTypeSize here // because that does not return the correct value for references. - unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy); - uint64_t Size = CGM.getTarget().getPointerWidth(AS); + unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(PointeeTy); + uint64_t Size = CGM.getTarget().getPointerWidth(AddressSpace); auto Align = getTypeAlignIfRequired(Ty, CGM.getContext()); + Optional DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(AddressSpace); if (Tag == llvm::dwarf::DW_TAG_reference_type || Tag == llvm::dwarf::DW_TAG_rvalue_reference_type) return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit), - Size, Align); + Size, Align, DWARFAddressSpace); else return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size, - Align); + Align, DWARFAddressSpace); } llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name, @@ -1577,8 +1635,13 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) { llvm::DITypeRefArray SElements = DBuilder.getOrCreateTypeArray(STy); llvm::DIType *SubTy = DBuilder.createSubroutineType(SElements); unsigned Size = Context.getTypeSize(Context.VoidPtrTy); + unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace(); + Optional DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); + llvm::DIType *vtbl_ptr_type = - DBuilder.createPointerType(SubTy, Size, 0, "__vtbl_ptr_type"); + DBuilder.createPointerType(SubTy, Size, 0, DWARFAddressSpace, + "__vtbl_ptr_type"); VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size); return VTablePtrType; } @@ -1617,10 +1680,14 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, unsigned VSlotCount = VFTLayout.vtable_components().size() - CGM.getLangOpts().RTTIData; unsigned VTableWidth = PtrWidth * VSlotCount; + unsigned VtblPtrAddressSpace = CGM.getTarget().getVtblPtrAddressSpace(); + Optional DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(VtblPtrAddressSpace); // Create a very wide void* type and insert it directly in the element list. llvm::DIType *VTableType = - DBuilder.createPointerType(nullptr, VTableWidth, 0, "__vtbl_ptr_type"); + DBuilder.createPointerType(nullptr, VTableWidth, 0, DWARFAddressSpace, + "__vtbl_ptr_type"); EltTys.push_back(VTableType); // The vptr is a pointer to this special vtable type. @@ -1683,7 +1750,27 @@ void CGDebugInfo::completeType(const RecordDecl *RD) { completeRequiredType(RD); } +/// Return true if the class or any of its methods are marked dllimport. +static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) { + if (RD->hasAttr()) + return true; + for (const CXXMethodDecl *MD : RD->methods()) + if (MD->hasAttr()) + return true; + return false; +} + void CGDebugInfo::completeClassData(const RecordDecl *RD) { + if (auto *CXXRD = dyn_cast(RD)) + if (CXXRD->isDynamicClass() && + CGM.getVTableLinkage(CXXRD) == + llvm::GlobalValue::AvailableExternallyLinkage && + !isClassOrMethodDLLImport(CXXRD)) + return; + completeClass(RD); +} + +void CGDebugInfo::completeClass(const RecordDecl *RD) { if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; QualType Ty = CGM.getContext().getRecordType(RD); @@ -1729,16 +1816,6 @@ static bool isDefinedInClangModule(const RecordDecl *RD) { return true; } -/// Return true if the class or any of its methods are marked dllimport. -static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) { - if (RD->hasAttr()) - return true; - for (const CXXMethodDecl *MD : RD->methods()) - if (MD->hasAttr()) - return true; - return false; -} - static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, bool DebugTypeExtRefs, const RecordDecl *RD, const LangOptions &LangOpts) { @@ -2377,6 +2454,21 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) { FullName); } +llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent, + unsigned MType, SourceLocation LineLoc, + StringRef Name, StringRef Value) { + unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc); + return DBuilder.createMacro(Parent, Line, MType, Name, Value); +} + +llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent, + SourceLocation LineLoc, + SourceLocation FileLoc) { + llvm::DIFile *FName = getOrCreateFile(FileLoc); + unsigned Line = LineLoc.isInvalid() ? 0 : getLineNumber(LineLoc); + return DBuilder.createTempMacroFile(Parent, Line, FName); +} + static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) { Qualifiers Quals; do { @@ -2420,8 +2512,9 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) { case Type::SubstTemplateTypeParm: T = cast(T)->getReplacementType(); break; - case Type::Auto: { - QualType DT = cast(T)->getDeducedType(); + case Type::Auto: + case Type::DeducedTemplateSpecialization: { + QualType DT = cast(T)->getDeducedType(); assert(!DT.isNull() && "Undeduced types shouldn't reach here."); T = DT; break; @@ -2587,6 +2680,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::Attributed: case Type::Adjusted: case Type::Decayed: + case Type::DeducedTemplateSpecialization: case Type::Elaborated: case Type::Paren: case Type::SubstTemplateTypeParm: @@ -2743,9 +2837,10 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, } // No need to replicate the linkage name if it isn't different from the // subprogram name, no need to have it at all unless coverage is enabled or - // debug is set to more than just line tables. + // debug is set to more than just line tables or extra debug info is needed. if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs && !CGM.getCodeGenOpts().EmitGcovNotes && + !CGM.getCodeGenOpts().DebugInfoForProfiling && DebugKind <= codegenoptions::DebugLineTablesOnly)) LinkageName = StringRef(); @@ -2813,28 +2908,40 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, VDContext = getContextDescriptor(cast(DC), Mod ? Mod : TheCU); } -llvm::DISubprogram * -CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) { +llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, + bool Stub) { llvm::DINodeArray TParamsArray; StringRef Name, LinkageName; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; - SourceLocation Loc = FD->getLocation(); + SourceLocation Loc = GD.getDecl()->getLocation(); llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *DContext = Unit; unsigned Line = getLineNumber(Loc); - - collectFunctionDeclProps(FD, Unit, Name, LinkageName, DContext, + collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray, Flags); + auto *FD = dyn_cast(GD.getDecl()); + // Build function type. SmallVector ArgTypes; - for (const ParmVarDecl *Parm: FD->parameters()) - ArgTypes.push_back(Parm->getType()); + if (FD) + for (const ParmVarDecl *Parm : FD->parameters()) + ArgTypes.push_back(Parm->getType()); CallingConv CC = FD->getType()->castAs()->getCallConv(); QualType FnType = CGM.getContext().getFunctionType( FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); + if (Stub) { + return DBuilder.createFunction( + DContext, Name, LinkageName, Unit, Line, + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), + !FD->isExternallyVisible(), + /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize, + TParamsArray.get(), getFunctionDeclaration(FD)); + } + llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl( DContext, Name, LinkageName, Unit, Line, - getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(), + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), + !FD->isExternallyVisible(), /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize, TParamsArray.get(), getFunctionDeclaration(FD)); const auto *CanonDecl = cast(FD->getCanonicalDecl()); @@ -2844,6 +2951,16 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) { return SP; } +llvm::DISubprogram * +CGDebugInfo::getFunctionForwardDeclaration(GlobalDecl GD) { + return getFunctionFwdDeclOrStub(GD, /* Stub = */ false); +} + +llvm::DISubprogram * +CGDebugInfo::getFunctionStub(GlobalDecl GD) { + return getFunctionFwdDeclOrStub(GD, /* Stub = */ true); +} + llvm::DIGlobalVariable * CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { QualType T; @@ -2857,7 +2974,7 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); auto *GV = DBuilder.createTempGlobalVariableFwdDecl( DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit), - !VD->isExternallyVisible(), nullptr, nullptr, Align); + !VD->isExternallyVisible(), nullptr, Align); FwdDeclReplaceMap.emplace_back( std::piecewise_construct, std::make_tuple(cast(VD->getCanonicalDecl())), @@ -2875,8 +2992,12 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) { getOrCreateFile(TD->getLocation())); auto I = DeclCache.find(D->getCanonicalDecl()); - if (I != DeclCache.end()) - return dyn_cast_or_null(I->second); + if (I != DeclCache.end()) { + auto N = I->second; + if (auto *GVE = dyn_cast_or_null(N)) + return GVE->getVariable(); + return dyn_cast_or_null(N); + } // No definition for now. Emit a forward definition that might be // merged with a potential upcoming definition. @@ -3111,6 +3232,27 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, TParamsArray.get(), getFunctionDeclaration(D))); } +void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) { + const auto *FD = cast(GD.getDecl()); + // If there is a subprogram for this function available then use it. + auto FI = SPCache.find(FD->getCanonicalDecl()); + llvm::DISubprogram *SP = nullptr; + if (FI != SPCache.end()) + SP = dyn_cast_or_null(FI->second); + if (!SP) + SP = getFunctionStub(GD); + FnBeginRegionCount.push_back(LexicalBlockStack.size()); + LexicalBlockStack.emplace_back(SP); + setInlinedAt(Builder.getCurrentDebugLocation()); + EmitLocation(Builder, FD->getLocation()); +} + +void CGDebugInfo::EmitInlineFunctionEnd(CGBuilderTy &Builder) { + assert(CurInlinedAt && "unbalanced inline scope stack"); + EmitFunctionEnd(Builder); + setInlinedAt(llvm::DebugLoc(CurInlinedAt).getInlinedAt()); +} + void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { // Update our current location setLocation(Loc); @@ -3120,7 +3262,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { llvm::MDNode *Scope = LexicalBlockStack.back(); Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( - getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope)); + getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope, CurInlinedAt)); } void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) { @@ -3132,14 +3274,29 @@ void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) { getColumnNumber(CurLoc))); } +void CGDebugInfo::AppendAddressSpaceXDeref( + unsigned AddressSpace, + SmallVectorImpl &Expr) const { + Optional DWARFAddressSpace = + CGM.getTarget().getDWARFAddressSpace(AddressSpace); + if (!DWARFAddressSpace) + return; + + Expr.push_back(llvm::dwarf::DW_OP_constu); + Expr.push_back(DWARFAddressSpace.getValue()); + Expr.push_back(llvm::dwarf::DW_OP_swap); + Expr.push_back(llvm::dwarf::DW_OP_xderef); +} + void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder, SourceLocation Loc) { // Set our current location. setLocation(Loc); // Emit a line table change for the current location inside the new scope. - Builder.SetCurrentDebugLocation(llvm::DebugLoc::get( - getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back())); + Builder.SetCurrentDebugLocation( + llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), + LexicalBlockStack.back(), CurInlinedAt)); if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; @@ -3281,13 +3438,16 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, Line = getLineNumber(VD->getLocation()); Column = getColumnNumber(VD->getLocation()); } - SmallVector Expr; + SmallVector Expr; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; if (VD->isImplicit()) Flags |= llvm::DINode::FlagArtificial; auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); + unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(VD->getType()); + AppendAddressSpaceXDeref(AddressSpace, Expr); + // If this is the first argument and it is implicit then // give it an object pointer flag. // FIXME: There has to be a better way to do this, but for static @@ -3325,9 +3485,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, Line, Ty, Align); // Insert an llvm.dbg.declare into the current block. - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope), - Builder.GetInsertBlock()); + DBuilder.insertDeclare( + Storage, D, DBuilder.createExpression(Expr), + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), + Builder.GetInsertBlock()); return; } else if (isa(VD->getType())) Expr.push_back(llvm::dwarf::DW_OP_deref); @@ -3358,9 +3519,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, Flags | llvm::DINode::FlagArtificial, FieldAlign); // Insert an llvm.dbg.declare into the current block. - DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope), - Builder.GetInsertBlock()); + DBuilder.insertDeclare( + Storage, D, DBuilder.createExpression(Expr), + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), + Builder.GetInsertBlock()); } } } @@ -3376,7 +3538,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, // Insert an llvm.dbg.declare into the current block. DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr), - llvm::DebugLoc::get(Line, Column, Scope), + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), Builder.GetInsertBlock()); } @@ -3457,7 +3619,8 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( Line, Ty, false, llvm::DINode::FlagZero, Align); // Insert an llvm.dbg.declare into the current block. - auto DL = llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back()); + auto DL = + llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back(), CurInlinedAt); if (InsertPoint) DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL, InsertPoint); @@ -3625,12 +3788,13 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, // Insert an llvm.dbg.value into the current block. DBuilder.insertDbgValueIntrinsic( LocalAddr, 0, debugVar, DBuilder.createExpression(), - llvm::DebugLoc::get(line, column, scope), Builder.GetInsertBlock()); + llvm::DebugLoc::get(line, column, scope, CurInlinedAt), + Builder.GetInsertBlock()); } // Insert an llvm.dbg.declare into the current block. DBuilder.insertDeclare(Arg, debugVar, DBuilder.createExpression(), - llvm::DebugLoc::get(line, column, scope), + llvm::DebugLoc::get(line, column, scope, CurInlinedAt), Builder.GetInsertBlock()); } @@ -3652,10 +3816,10 @@ CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) { return CreateRecordStaticField(D, Ctxt, cast(DC)); } -llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls( +llvm::DIGlobalVariableExpression *CGDebugInfo::CollectAnonRecordDecls( const RecordDecl *RD, llvm::DIFile *Unit, unsigned LineNo, StringRef LinkageName, llvm::GlobalVariable *Var, llvm::DIScope *DContext) { - llvm::DIGlobalVariable *GV = nullptr; + llvm::DIGlobalVariableExpression *GVE = nullptr; for (const auto *Field : RD->fields()) { llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit); @@ -3664,16 +3828,17 @@ llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls( // Ignore unnamed fields, but recurse into anonymous records. if (FieldName.empty()) { if (const auto *RT = dyn_cast(Field->getType())) - GV = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName, + GVE = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName, Var, DContext); continue; } // Use VarDecl's Tag, Scope and Line number. - GV = DBuilder.createGlobalVariable(DContext, FieldName, LinkageName, Unit, - LineNo, FieldTy, Var->hasLocalLinkage()); - Var->addDebugInfo(GV); + GVE = DBuilder.createGlobalVariableExpression( + DContext, FieldName, LinkageName, Unit, LineNo, FieldTy, + Var->hasLocalLinkage()); + Var->addDebugInfo(GVE); } - return GV; + return GVE; } void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, @@ -3686,7 +3851,8 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, // it to the llvm::GlobalVariable. auto Cached = DeclCache.find(D->getCanonicalDecl()); if (Cached != DeclCache.end()) - return Var->addDebugInfo(cast(Cached->second)); + return Var->addDebugInfo( + cast(Cached->second)); // Create global variable debug descriptor. llvm::DIFile *Unit = nullptr; @@ -3698,7 +3864,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, // Attempt to store one global variable for the declaration - even if we // emit a lot of fields. - llvm::DIGlobalVariable *GV = nullptr; + llvm::DIGlobalVariableExpression *GVE = nullptr; // If this is an anonymous union then we'll want to emit a global // variable for each member of the anonymous union so that it's possible @@ -3707,16 +3873,23 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, const RecordDecl *RD = T->castAs()->getDecl(); assert(RD->isAnonymousStructOrUnion() && "unnamed non-anonymous struct or union?"); - GV = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext); + GVE = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext); } else { auto Align = getDeclAlignIfRequired(D, CGM.getContext()); - GV = DBuilder.createGlobalVariable( + + SmallVector Expr; + unsigned AddressSpace = + CGM.getContext().getTargetAddressSpace(D->getType()); + AppendAddressSpaceXDeref(AddressSpace, Expr); + + GVE = DBuilder.createGlobalVariableExpression( DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), - Var->hasLocalLinkage(), /*Expr=*/nullptr, + Var->hasLocalLinkage(), + Expr.empty() ? nullptr : DBuilder.createExpression(Expr), getOrCreateStaticDataMemberDeclarationOrNull(D), Align); - Var->addDebugInfo(GV); + Var->addDebugInfo(GVE); } - DeclCache[D->getCanonicalDecl()].reset(GV); + DeclCache[D->getCanonicalDecl()].reset(GVE); } void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { @@ -3761,13 +3934,16 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { if (GV) return; llvm::DIExpression *InitExpr = nullptr; - if (Init.isInt()) - InitExpr = - DBuilder.createConstantValueExpression(Init.getInt().getExtValue()); - else if (Init.isFloat() && CGM.getContext().getTypeSize(VD->getType()) <= 64) - InitExpr = DBuilder.createConstantValueExpression( - Init.getFloat().bitcastToAPInt().getZExtValue()); - GV.reset(DBuilder.createGlobalVariable( + if (CGM.getContext().getTypeSize(VD->getType()) <= 64) { + // FIXME: Add a representation for integer constants wider than 64 bits. + if (Init.isInt()) + InitExpr = + DBuilder.createConstantValueExpression(Init.getInt().getExtValue()); + else if (Init.isFloat()) + InitExpr = DBuilder.createConstantValueExpression( + Init.getFloat().bitcastToAPInt().getZExtValue()); + } + GV.reset(DBuilder.createGlobalVariableExpression( DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty, true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD), Align)); @@ -3914,6 +4090,8 @@ void CGDebugInfo::finalize() { else Repl = it->second; + if (auto *GVE = dyn_cast_or_null(Repl)) + Repl = GVE->getVariable(); DBuilder.replaceTemporary(std::move(FwdDecl), cast(Repl)); } diff --git a/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/tools/clang/lib/CodeGen/CodeGenTypes.cpp index 5ed1d5a..79203e1 100644 --- a/tools/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/tools/clang/lib/CodeGen/CodeGenTypes.cpp @@ -477,7 +477,6 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty); break; @@ -492,7 +491,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { break; } case Type::Auto: - llvm_unreachable("Unexpected undeduced auto type!"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("Unexpected undeduced type!"); case Type::Complex: { llvm::Type *EltTy = ConvertType(cast(Ty)->getElementType()); ResultType = llvm::StructType::get(EltTy, EltTy, nullptr); @@ -742,7 +742,7 @@ CodeGenTypes::getCGRecordLayout(const RecordDecl *RD) { } bool CodeGenTypes::isPointerZeroInitializable(QualType T) { - assert (T->isAnyPointerType() && "Invalid type"); + assert((T->isAnyPointerType() || T->isBlockPointerType()) && "Invalid type"); return isZeroInitializable(T); } diff --git a/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp index fdea3fe..a7db775 100644 --- a/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -24,8 +24,8 @@ #include "CGVTables.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "ConstantBuilder.h" #include "TargetInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Mangle.h" #include "clang/AST/Type.h" #include "clang/AST/StmtCXX.h" @@ -207,8 +207,9 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { void EmitCXXConstructors(const CXXConstructorDecl *D) override; - void buildStructorSignature(const CXXMethodDecl *MD, StructorType T, - SmallVectorImpl &ArgTys) override; + AddedStructorArgs + buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + SmallVectorImpl &ArgTys) override; bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor, CXXDtorType DT) const override { @@ -225,11 +226,10 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override; - unsigned addImplicitConstructorArgs(CodeGenFunction &CGF, - const CXXConstructorDecl *D, - CXXCtorType Type, bool ForVirtualBase, - bool Delegating, - CallArgList &Args) override; + AddedStructorArgs + addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, + CXXCtorType Type, bool ForVirtualBase, + bool Delegating, CallArgList &Args) override; void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, @@ -366,11 +366,12 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override; private: - bool hasAnyUsedVirtualInlineFunction(const CXXRecordDecl *RD) const { + bool hasAnyVirtualInlineFunction(const CXXRecordDecl *RD) const { const auto &VtableLayout = CGM.getItaniumVTableContext().getVTableLayout(RD); for (const auto &VtableComponent : VtableLayout.vtable_components()) { + // Skip empty slot. if (!VtableComponent.isUsedFunctionPointerKind()) continue; @@ -1352,7 +1353,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) { } } -void +CGCXXABI::AddedStructorArgs ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, SmallVectorImpl &ArgTys) { ASTContext &Context = getContext(); @@ -1361,9 +1362,12 @@ ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, // These are Clang types, so we don't need to worry about sret yet. // Check if we need to add a VTT parameter (which has type void **). - if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) + if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) { ArgTys.insert(ArgTys.begin() + 1, Context.getPointerType(Context.VoidPtrTy)); + return AddedStructorArgs::prefix(1); + } + return AddedStructorArgs{}; } void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { @@ -1428,11 +1432,11 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue); } -unsigned ItaniumCXXABI::addImplicitConstructorArgs( +CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs( CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, CallArgList &Args) { if (!NeedsVTTParameter(GlobalDecl(D, Type))) - return 0; + return AddedStructorArgs{}; // Insert the implicit 'vtt' argument as the second argument. llvm::Value *VTT = @@ -1440,7 +1444,7 @@ unsigned ItaniumCXXABI::addImplicitConstructorArgs( QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy); Args.insert(Args.begin() + 1, CallArg(RValue::get(VTT), VTTTy, /*needscopy=*/false)); - return 1; // Added one arg. + return AddedStructorArgs::prefix(1); // Added one arg. } void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, @@ -1687,7 +1691,7 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { // then we are safe to emit available_externally copy of vtable. // FIXME we can still emit a copy of the vtable if we // can emit definition of the inline functions. - return !hasAnyUsedVirtualInlineFunction(RD) && !isVTableHidden(RD); + return !hasAnyVirtualInlineFunction(RD) && !isVTableHidden(RD); } static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, Address InitialPtr, @@ -2014,10 +2018,11 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, // The ABI says: "It is suggested that it be emitted in the same COMDAT // group as the associated data object." In practice, this doesn't work for - // non-ELF object formats, so only do it for ELF. + // non-ELF and non-Wasm object formats, so only do it for ELF and Wasm. llvm::Comdat *C = var->getComdat(); if (!D.isLocalVarDecl() && C && - CGM.getTarget().getTriple().isOSBinFormatELF()) { + (CGM.getTarget().getTriple().isOSBinFormatELF() || + CGM.getTarget().getTriple().isOSBinFormatWasm())) { guard->setComdat(C); // An inline variable's guard function is run from the per-TU // initialization function, not via a dedicated global ctor function, so @@ -2160,7 +2165,9 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, // Create a variable that binds the atexit to this shared object. llvm::Constant *handle = - CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle"); + CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle"); + auto *GV = cast(handle->stripPointerCasts()); + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); llvm::Value *args[] = { llvm::ConstantExpr::getBitCast(dtor, dtorTy), @@ -2271,7 +2278,21 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( ArrayRef CXXThreadLocalInits, ArrayRef CXXThreadLocalInitVars) { llvm::Function *InitFunc = nullptr; - if (!CXXThreadLocalInits.empty()) { + + // Separate initializers into those with ordered (or partially-ordered) + // initialization and those with unordered initialization. + llvm::SmallVector OrderedInits; + llvm::SmallDenseMap UnorderedInits; + for (unsigned I = 0; I != CXXThreadLocalInits.size(); ++I) { + if (isTemplateInstantiation( + CXXThreadLocalInitVars[I]->getTemplateSpecializationKind())) + UnorderedInits[CXXThreadLocalInitVars[I]->getCanonicalDecl()] = + CXXThreadLocalInits[I]; + else + OrderedInits.push_back(CXXThreadLocalInits[I]); + } + + if (!OrderedInits.empty()) { // Generate a guarded initialization function. llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); @@ -2288,24 +2309,28 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( CharUnits GuardAlign = CharUnits::One(); Guard->setAlignment(GuardAlign.getQuantity()); - CodeGenFunction(CGM) - .GenerateCXXGlobalInitFunc(InitFunc, CXXThreadLocalInits, - Address(Guard, GuardAlign)); + CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, OrderedInits, + Address(Guard, GuardAlign)); // On Darwin platforms, use CXX_FAST_TLS calling convention. if (CGM.getTarget().getTriple().isOSDarwin()) { InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); InitFunc->addFnAttr(llvm::Attribute::NoUnwind); } } + + // Emit thread wrappers. for (const VarDecl *VD : CXXThreadLocals) { llvm::GlobalVariable *Var = cast(CGM.GetGlobalValue(CGM.getMangledName(VD))); + llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var); // Some targets require that all access to thread local variables go through // the thread wrapper. This means that we cannot attempt to create a thread // wrapper or a thread helper. - if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) + if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) { + Wrapper->setLinkage(llvm::Function::ExternalLinkage); continue; + } // Mangle the name for the thread_local initialization function. SmallString<256> InitFnName; @@ -2321,18 +2346,21 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( bool InitIsInitFunc = false; if (VD->hasDefinition()) { InitIsInitFunc = true; - if (InitFunc) + llvm::Function *InitFuncToUse = InitFunc; + if (isTemplateInstantiation(VD->getTemplateSpecializationKind())) + InitFuncToUse = UnorderedInits.lookup(VD->getCanonicalDecl()); + if (InitFuncToUse) Init = llvm::GlobalAlias::create(Var->getLinkage(), InitFnName.str(), - InitFunc); + InitFuncToUse); } else { // Emit a weak global function referring to the initialization function. // This function will not exist if the TU defining the thread_local // variable in question does not need any dynamic initialization for // its thread_local variables. llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, false); - Init = llvm::Function::Create( - FnTy, llvm::GlobalVariable::ExternalWeakLinkage, InitFnName.str(), - &CGM.getModule()); + Init = llvm::Function::Create(FnTy, + llvm::GlobalVariable::ExternalWeakLinkage, + InitFnName.str(), &CGM.getModule()); const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); CGM.SetLLVMFunctionAttributes(nullptr, FI, cast(Init)); } @@ -2340,7 +2368,6 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( if (Init) Init->setVisibility(Var->getVisibility()); - llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var); llvm::LLVMContext &Context = CGM.getModule().getContext(); llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper); CGBuilderTy Builder(CGM, Entry); @@ -2615,7 +2642,6 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: return false; @@ -2795,7 +2821,8 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { llvm_unreachable("References shouldn't get here"); case Type::Auto: - llvm_unreachable("Undeduced auto type shouldn't get here"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("Undeduced type shouldn't get here"); case Type::Pipe: llvm_unreachable("Pipe types shouldn't get here"); @@ -3025,7 +3052,8 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force, llvm_unreachable("References shouldn't get here"); case Type::Auto: - llvm_unreachable("Undeduced auto type shouldn't get here"); + case Type::DeducedTemplateSpecialization: + llvm_unreachable("Undeduced type shouldn't get here"); case Type::Pipe: llvm_unreachable("Pipe type shouldn't get here"); @@ -3515,8 +3543,9 @@ static StructorCodegen getCodegenToUse(CodeGenModule &CGM, return StructorCodegen::RAUW; if (llvm::GlobalValue::isWeakForLinker(Linkage)) { - // Only ELF supports COMDATs with arbitrary names (C5/D5). - if (CGM.getTarget().getTriple().isOSBinFormatELF()) + // Only ELF and wasm support COMDATs with arbitrary names (C5/D5). + if (CGM.getTarget().getTriple().isOSBinFormatELF() || + CGM.getTarget().getTriple().isOSBinFormatWasm()) return StructorCodegen::COMDAT; return StructorCodegen::Emit; } diff --git a/tools/clang/lib/CodeGen/TargetInfo.cpp b/tools/clang/lib/CodeGen/TargetInfo.cpp index 6ea4cff..d38bf4c 100644 --- a/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -871,6 +871,14 @@ static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) { return NumMembers <= 4; } +/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. +static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { + auto AI = ABIArgInfo::getDirect(T); + AI.setInReg(true); + AI.setCanBeFlattened(false); + return AI; +} + //===----------------------------------------------------------------------===// // X86-32 ABI Implementation //===----------------------------------------------------------------------===// @@ -884,6 +892,11 @@ struct CCState { unsigned FreeSSERegs; }; +enum { + // Vectorcall only allows the first 6 parameters to be passed in registers. + VectorcallMaxParamNumAsReg = 6 +}; + /// X86_32ABIInfo - The X86-32 ABI information. class X86_32ABIInfo : public SwiftABIInfo { enum Class { @@ -929,6 +942,8 @@ class X86_32ABIInfo : public SwiftABIInfo { Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; + ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State, + const ABIArgInfo& current) const; /// \brief Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; @@ -946,6 +961,8 @@ class X86_32ABIInfo : public SwiftABIInfo { void addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const; + void computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, + bool &UsedInAlloca) const; public: @@ -1180,6 +1197,39 @@ static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { return Size == 32 || Size == 64; } +static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD, + uint64_t &Size) { + for (const auto *FD : RD->fields()) { + // Scalar arguments on the stack get 4 byte alignment on x86. If the + // argument is smaller than 32-bits, expanding the struct will create + // alignment padding. + if (!is32Or64BitBasicType(FD->getType(), Context)) + return false; + + // FIXME: Reject bit-fields wholesale; there are two problems, we don't know + // how to expand them yet, and the predicate for telling if a bitfield still + // counts as "basic" is more complicated than what we were doing previously. + if (FD->isBitField()) + return false; + + Size += Context.getTypeSize(FD->getType()); + } + return true; +} + +static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD, + uint64_t &Size) { + // Don't do this if there are any non-empty bases. + for (const CXXBaseSpecifier &Base : RD->bases()) { + if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(), + Size)) + return false; + } + if (!addFieldSizes(Context, RD, Size)) + return false; + return true; +} + /// Test whether an argument type which is to be passed indirectly (on the /// stack) would have the equivalent layout if it was expanded into separate /// arguments. If so, we prefer to do the latter to avoid inhibiting @@ -1190,8 +1240,9 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { if (!RT) return false; const RecordDecl *RD = RT->getDecl(); + uint64_t Size = 0; if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { - if (!IsWin32StructABI ) { + if (!IsWin32StructABI) { // On non-Windows, we have to conservatively match our old bitcode // prototypes in order to be ABI-compatible at the bitcode level. if (!CXXRD->isCLike()) @@ -1200,30 +1251,12 @@ bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { // Don't do this for dynamic classes. if (CXXRD->isDynamicClass()) return false; - // Don't do this if there are any non-empty bases. - for (const CXXBaseSpecifier &Base : CXXRD->bases()) { - if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true)) - return false; - } } - } - - uint64_t Size = 0; - - for (const auto *FD : RD->fields()) { - // Scalar arguments on the stack get 4 byte alignment on x86. If the - // argument is smaller than 32-bits, expanding the struct will create - // alignment padding. - if (!is32Or64BitBasicType(FD->getType(), getContext())) + if (!addBaseAndFieldSizes(getContext(), CXXRD, Size)) return false; - - // FIXME: Reject bit-fields wholesale; there are two problems, we don't know - // how to expand them yet, and the predicate for telling if a bitfield still - // counts as "basic" is more complicated than what we were doing previously. - if (FD->isBitField()) + } else { + if (!addFieldSizes(getContext(), RD, Size)) return false; - - Size += getContext().getTypeSize(FD->getType()); } // We can do this if there was no alignment padding. @@ -1494,6 +1527,27 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return true; } +ABIArgInfo +X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State, + const ABIArgInfo ¤t) const { + // Assumes vectorCall calling convention. + const Type *Base = nullptr; + uint64_t NumElts = 0; + + if (!Ty->isBuiltinType() && !Ty->isVectorType() && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + // HVA types get passed directly in registers if there is room. + State.FreeSSERegs -= NumElts; + return getDirectX86Hva(); + } + // If there's no room, the HVA gets passed as normal indirect + // structure. + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + return current; +} + ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. @@ -1513,19 +1567,34 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } // vectorcall adds the concept of a homogenous vector aggregate, similar - // to other targets. + // to other targets, regcall uses some of the HVA rules. const Type *Base = nullptr; uint64_t NumElts = 0; if ((State.CC == llvm::CallingConv::X86_VectorCall || State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - if (Ty->isBuiltinType() || Ty->isVectorType()) + + if (State.CC == llvm::CallingConv::X86_RegCall) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + if (Ty->isBuiltinType() || Ty->isVectorType()) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getExpand(); + + } + return getIndirectResult(Ty, /*ByVal=*/false, State); + } else if (State.CC == llvm::CallingConv::X86_VectorCall) { + if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) { + // Actual floating-point types get registers first time through if + // there is registers available + State.FreeSSERegs -= NumElts; return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); + } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { + // HVA Types only get registers after everything else has been + // set, so it gets set as indirect for now. + return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty)); + } } - return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { @@ -1604,6 +1673,36 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, return ABIArgInfo::getDirect(); } +void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, + bool &UsedInAlloca) const { + // Vectorcall only allows the first 6 parameters to be passed in registers, + // and homogeneous vector aggregates are only put into registers as a second + // priority. + unsigned Count = 0; + CCState ZeroState = State; + ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0; + // HVAs must be done as a second priority for registers, so the deferred + // items are dealt with by going through the pattern a second time. + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = classifyArgumentType(I.type, State); + else + // Parameters after the 6th cannot be passed in registers, + // so pretend there are no registers left for them. + I.info = classifyArgumentType(I.type, ZeroState); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + ++Count; + } + Count = 0; + // Go through the arguments a second time to get HVAs registers if there + // are still some available. + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = reclassifyHvaArgType(I.type, State, I.info); + ++Count; + } +} + void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { CCState State(FI.getCallingConvention()); if (IsMCUABI) @@ -1638,9 +1737,14 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { ++State.FreeRegs; bool UsedInAlloca = false; - for (auto &I : FI.arguments()) { - I.info = classifyArgumentType(I.type, State); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + if (State.CC == llvm::CallingConv::X86_VectorCall) { + computeVectorCallArgs(FI, State, UsedInAlloca); + } else { + // If not vectorcall, revert to normal behavior. + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type, State); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } // If we needed to use inalloca for any argument, do a second pass and rewrite @@ -2070,10 +2174,14 @@ class WinX86_64ABIInfo : public SwiftABIInfo { } private: - ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType) const; - - bool IsMingw64; + ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, + bool IsVectorCall, bool IsRegCall) const; + ABIArgInfo reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, + const ABIArgInfo ¤t) const; + void computeVectorCallArgs(CGFunctionInfo &FI, unsigned FreeSSERegs, + bool IsVectorCall, bool IsRegCall) const; + + bool IsMingw64; }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -3680,8 +3788,24 @@ Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, /*allowHigherAlign*/ false); } +ABIArgInfo +WinX86_64ABIInfo::reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, + const ABIArgInfo ¤t) const { + // Assumes vectorCall calling convention. + const Type *Base = nullptr; + uint64_t NumElts = 0; + + if (!Ty->isBuiltinType() && !Ty->isVectorType() && + isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + return getDirectX86Hva(); + } + return current; +} + ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType) const { + bool IsReturnType, bool IsVectorCall, + bool IsRegCall) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); @@ -3705,21 +3829,34 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, } - // vectorcall adds the concept of a homogenous vector aggregate, similar to - // other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (FreeSSERegs >= NumElts) { - FreeSSERegs -= NumElts; - if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) + // vectorcall adds the concept of a homogenous vector aggregate, similar to + // other targets. + if ((IsVectorCall || IsRegCall) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (IsRegCall) { + if (FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getExpand(); + } + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } else if (IsVectorCall) { + if (FreeSSERegs >= NumElts && + (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { + FreeSSERegs -= NumElts; return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); + } else if (IsReturnType) { + return ABIArgInfo::getExpand(); + } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { + // HVAs are delayed and reclassified in the 2nd step. + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } } - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } - if (Ty->isMemberPointerType()) { // If the member pointer is represented by an LLVM int or ptr, pass it // directly. @@ -3755,6 +3892,32 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, return ABIArgInfo::getDirect(); } +void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, + unsigned FreeSSERegs, + bool IsVectorCall, + bool IsRegCall) const { + unsigned Count = 0; + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); + else { + // Since these cannot be passed in registers, pretend no registers + // are left. + unsigned ZeroSSERegsAvail = 0; + I.info = classify(I.type, /*FreeSSERegs=*/ZeroSSERegsAvail, false, + IsVectorCall, IsRegCall); + } + ++Count; + } + + Count = 0; + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); + ++Count; + } +} + void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool IsVectorCall = FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; @@ -3770,17 +3933,24 @@ void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { } if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true); + FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, + IsVectorCall, IsRegCall); if (IsVectorCall) { // We can use up to 6 SSE register parameters with vectorcall. FreeSSERegs = 6; } else if (IsRegCall) { + // RegCall gives us 16 SSE registers, we can reuse the return registers. FreeSSERegs = 16; } - for (auto &I : FI.arguments()) - I.info = classify(I.type, FreeSSERegs, false); + if (IsVectorCall) { + computeVectorCallArgs(FI, FreeSSERegs, IsVectorCall, IsRegCall); + } else { + for (auto &I : FI.arguments()) + I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); + } + } Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -4666,7 +4836,7 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue"; + return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { @@ -6730,6 +6900,31 @@ MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, return false; } +//===----------------------------------------------------------------------===// +// AVR ABI Implementation. +//===----------------------------------------------------------------------===// + +namespace { +class AVRTargetCodeGenInfo : public TargetCodeGenInfo { +public: + AVRTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const auto *FD = dyn_cast_or_null(D); + if (!FD) return; + auto *Fn = cast(GV); + + if (FD->getAttr()) + Fn->addFnAttr("interrupt"); + + if (FD->getAttr()) + Fn->addFnAttr("signal"); + } +}; +} + //===----------------------------------------------------------------------===// // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults. // Currently subclassed only to implement custom OpenCL C function attribute @@ -8270,6 +8465,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::mips64el: return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false)); + case llvm::Triple::avr: + return SetCGInfo(new AVRTargetCodeGenInfo(Types)); + case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: { AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS; diff --git a/tools/clang/lib/Driver/Tools.cpp b/tools/clang/lib/Driver/Tools.cpp deleted file mode 100644 index 64c7899..0000000 --- a/tools/clang/lib/Driver/Tools.cpp +++ /dev/null @@ -1,12162 +0,0 @@ -//===--- Tools.cpp - Tools Implementations ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "Tools.h" -#include "InputInfo.h" -#include "ToolChains.h" -#include "clang/Basic/CharInfo.h" -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/ObjCRuntime.h" -#include "clang/Basic/Version.h" -#include "clang/Config/config.h" -#include "clang/Driver/Action.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Driver.h" -#include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/Job.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/SanitizerArgs.h" -#include "clang/Driver/ToolChain.h" -#include "clang/Driver/Util.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CodeGen.h" -#include "llvm/Support/Compression.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/TargetParser.h" -#include "llvm/Support/YAMLParser.h" - -#ifdef LLVM_ON_UNIX -#include // For getuid(). -#endif - -using namespace clang::driver; -using namespace clang::driver::tools; -using namespace clang; -using namespace llvm::opt; - -static void handleTargetFeaturesGroup(const ArgList &Args, - std::vector &Features, - OptSpecifier Group) { - for (const Arg *A : Args.filtered(Group)) { - StringRef Name = A->getOption().getName(); - A->claim(); - - // Skip over "-m". - assert(Name.startswith("m") && "Invalid feature name."); - Name = Name.substr(1); - - bool IsNegative = Name.startswith("no-"); - if (IsNegative) - Name = Name.substr(3); - Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name)); - } -} - -static const char *getSparcAsmModeForCPU(StringRef Name, - const llvm::Triple &Triple) { - if (Triple.getArch() == llvm::Triple::sparcv9) { - return llvm::StringSwitch(Name) - .Case("niagara", "-Av9b") - .Case("niagara2", "-Av9b") - .Case("niagara3", "-Av9d") - .Case("niagara4", "-Av9d") - .Default("-Av9"); - } else { - return llvm::StringSwitch(Name) - .Case("v8", "-Av8") - .Case("supersparc", "-Av8") - .Case("sparclite", "-Asparclite") - .Case("f934", "-Asparclite") - .Case("hypersparc", "-Av8") - .Case("sparclite86x", "-Asparclite") - .Case("sparclet", "-Asparclet") - .Case("tsc701", "-Asparclet") - .Case("v9", "-Av8plus") - .Case("ultrasparc", "-Av8plus") - .Case("ultrasparc3", "-Av8plus") - .Case("niagara", "-Av8plusb") - .Case("niagara2", "-Av8plusb") - .Case("niagara3", "-Av8plusd") - .Case("niagara4", "-Av8plusd") - .Case("leon2", "-Av8") - .Case("at697e", "-Av8") - .Case("at697f", "-Av8") - .Case("leon3", "-Av8") - .Case("ut699", "-Av8") - .Case("gr712rc", "-Av8") - .Case("leon4", "-Av8") - .Case("gr740", "-Av8") - .Default("-Av8"); - } -} - -static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) { - if (Arg *A = Args.getLastArg(options::OPT_C, options::OPT_CC)) { - if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) && - !Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) { - D.Diag(diag::err_drv_argument_only_allowed_with) - << A->getBaseArg().getAsString(Args) - << (D.IsCLMode() ? "/E, /P or /EP" : "-E"); - } - } -} - -static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) { - // In gcc, only ARM checks this, but it seems reasonable to check universally. - if (Args.hasArg(options::OPT_static)) - if (const Arg *A = - Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic)) - D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) - << "-static"; -} - -// Add backslashes to escape spaces and other backslashes. -// This is used for the space-separated argument list specified with -// the -dwarf-debug-flags option. -static void EscapeSpacesAndBackslashes(const char *Arg, - SmallVectorImpl &Res) { - for (; *Arg; ++Arg) { - switch (*Arg) { - default: - break; - case ' ': - case '\\': - Res.push_back('\\'); - break; - } - Res.push_back(*Arg); - } -} - -// Quote target names for inclusion in GNU Make dependency files. -// Only the characters '$', '#', ' ', '\t' are quoted. -static void QuoteTarget(StringRef Target, SmallVectorImpl &Res) { - for (unsigned i = 0, e = Target.size(); i != e; ++i) { - switch (Target[i]) { - case ' ': - case '\t': - // Escape the preceding backslashes - for (int j = i - 1; j >= 0 && Target[j] == '\\'; --j) - Res.push_back('\\'); - - // Escape the space/tab - Res.push_back('\\'); - break; - case '$': - Res.push_back('$'); - break; - case '#': - Res.push_back('\\'); - break; - default: - break; - } - - Res.push_back(Target[i]); - } -} - -static void addDirectoryList(const ArgList &Args, ArgStringList &CmdArgs, - const char *ArgName, const char *EnvVar) { - const char *DirList = ::getenv(EnvVar); - bool CombinedArg = false; - - if (!DirList) - return; // Nothing to do. - - StringRef Name(ArgName); - if (Name.equals("-I") || Name.equals("-L")) - CombinedArg = true; - - StringRef Dirs(DirList); - if (Dirs.empty()) // Empty string should not add '.'. - return; - - StringRef::size_type Delim; - while ((Delim = Dirs.find(llvm::sys::EnvPathSeparator)) != StringRef::npos) { - if (Delim == 0) { // Leading colon. - if (CombinedArg) { - CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + ".")); - } else { - CmdArgs.push_back(ArgName); - CmdArgs.push_back("."); - } - } else { - if (CombinedArg) { - CmdArgs.push_back( - Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim))); - } else { - CmdArgs.push_back(ArgName); - CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim))); - } - } - Dirs = Dirs.substr(Delim + 1); - } - - if (Dirs.empty()) { // Trailing colon. - if (CombinedArg) { - CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + ".")); - } else { - CmdArgs.push_back(ArgName); - CmdArgs.push_back("."); - } - } else { // Add the last path. - if (CombinedArg) { - CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs)); - } else { - CmdArgs.push_back(ArgName); - CmdArgs.push_back(Args.MakeArgString(Dirs)); - } - } -} - -static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, - const ArgList &Args, ArgStringList &CmdArgs, - const JobAction &JA) { - const Driver &D = TC.getDriver(); - - // Add extra linker input arguments which are not treated as inputs - // (constructed via -Xarch_). - Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input); - - for (const auto &II : Inputs) { - // If the current tool chain refers to an OpenMP offloading host, we should - // ignore inputs that refer to OpenMP offloading devices - they will be - // embedded according to a proper linker script. - if (auto *IA = II.getAction()) - if (JA.isHostOffloading(Action::OFK_OpenMP) && - IA->isDeviceOffloading(Action::OFK_OpenMP)) - continue; - - if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType())) - // Don't try to pass LLVM inputs unless we have native support. - D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString(); - - // Add filenames immediately. - if (II.isFilename()) { - CmdArgs.push_back(II.getFilename()); - continue; - } - - // Otherwise, this is a linker input argument. - const Arg &A = II.getInputArg(); - - // Handle reserved library options. - if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) - TC.AddCXXStdlibLibArgs(Args, CmdArgs); - else if (A.getOption().matches(options::OPT_Z_reserved_lib_cckext)) - TC.AddCCKextLibArgs(Args, CmdArgs); - else if (A.getOption().matches(options::OPT_z)) { - // Pass -z prefix for gcc linker compatibility. - A.claim(); - A.render(Args, CmdArgs); - } else { - A.renderAsInput(Args, CmdArgs); - } - } - - // LIBRARY_PATH - included following the user specified library paths. - // and only supported on native toolchains. - if (!TC.isCrossCompiling()) - addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); -} - -/// Add OpenMP linker script arguments at the end of the argument list so that -/// the fat binary is built by embedding each of the device images into the -/// host. The linker script also defines a few symbols required by the code -/// generation so that the images can be easily retrieved at runtime by the -/// offloading library. This should be used only in tool chains that support -/// linker scripts. -static void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, ArgStringList &CmdArgs, - const JobAction &JA) { - - // If this is not an OpenMP host toolchain, we don't need to do anything. - if (!JA.isHostOffloading(Action::OFK_OpenMP)) - return; - - // Create temporary linker script. Keep it if save-temps is enabled. - const char *LKS; - SmallString<256> Name = llvm::sys::path::filename(Output.getFilename()); - if (C.getDriver().isSaveTempsEnabled()) { - llvm::sys::path::replace_extension(Name, "lk"); - LKS = C.getArgs().MakeArgString(Name.c_str()); - } else { - llvm::sys::path::replace_extension(Name, ""); - Name = C.getDriver().GetTemporaryPath(Name, "lk"); - LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str())); - } - - // Add linker script option to the command. - CmdArgs.push_back("-T"); - CmdArgs.push_back(LKS); - - // Create a buffer to write the contents of the linker script. - std::string LksBuffer; - llvm::raw_string_ostream LksStream(LksBuffer); - - // Get the OpenMP offload tool chains so that we can extract the triple - // associated with each device input. - auto OpenMPToolChains = C.getOffloadToolChains(); - assert(OpenMPToolChains.first != OpenMPToolChains.second && - "No OpenMP toolchains??"); - - // Track the input file name and device triple in order to build the script, - // inserting binaries in the designated sections. - SmallVector, 8> InputBinaryInfo; - - // Add commands to embed target binaries. We ensure that each section and - // image is 16-byte aligned. This is not mandatory, but increases the - // likelihood of data to be aligned with a cache block in several main host - // machines. - LksStream << "/*\n"; - LksStream << " OpenMP Offload Linker Script\n"; - LksStream << " *** Automatically generated by Clang ***\n"; - LksStream << "*/\n"; - LksStream << "TARGET(binary)\n"; - auto DTC = OpenMPToolChains.first; - for (auto &II : Inputs) { - const Action *A = II.getAction(); - // Is this a device linking action? - if (A && isa(A) && - A->isDeviceOffloading(Action::OFK_OpenMP)) { - assert(DTC != OpenMPToolChains.second && - "More device inputs than device toolchains??"); - InputBinaryInfo.push_back(std::make_pair( - DTC->second->getTriple().normalize(), II.getFilename())); - ++DTC; - LksStream << "INPUT(" << II.getFilename() << ")\n"; - } - } - - assert(DTC == OpenMPToolChains.second && - "Less device inputs than device toolchains??"); - - LksStream << "SECTIONS\n"; - LksStream << "{\n"; - LksStream << " .omp_offloading :\n"; - LksStream << " ALIGN(0x10)\n"; - LksStream << " {\n"; - - for (auto &BI : InputBinaryInfo) { - LksStream << " . = ALIGN(0x10);\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first - << " = .);\n"; - LksStream << " " << BI.second << "\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first - << " = .);\n"; - } - - LksStream << " }\n"; - // Add commands to define host entries begin and end. We use 1-byte subalign - // so that the linker does not add any padding and the elements in this - // section form an array. - LksStream << " .omp_offloading.entries :\n"; - LksStream << " ALIGN(0x10)\n"; - LksStream << " SUBALIGN(0x01)\n"; - LksStream << " {\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_begin = .);\n"; - LksStream << " *(.omp_offloading.entries)\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.entries_end = .);\n"; - LksStream << " }\n"; - LksStream << "}\n"; - LksStream << "INSERT BEFORE .data\n"; - LksStream.flush(); - - // Dump the contents of the linker script if the user requested that. We - // support this option to enable testing of behavior with -###. - if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script)) - llvm::errs() << LksBuffer; - - // If this is a dry run, do not create the linker script file. - if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) - return; - - // Open script file and write the contents. - std::error_code EC; - llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::F_None); - - if (EC) { - C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); - return; - } - - Lksf << LksBuffer; -} - -/// \brief Determine whether Objective-C automated reference counting is -/// enabled. -static bool isObjCAutoRefCount(const ArgList &Args) { - return Args.hasFlag(options::OPT_fobjc_arc, options::OPT_fno_objc_arc, false); -} - -/// \brief Determine whether we are linking the ObjC runtime. -static bool isObjCRuntimeLinked(const ArgList &Args) { - if (isObjCAutoRefCount(Args)) { - Args.ClaimAllArgs(options::OPT_fobjc_link_runtime); - return true; - } - return Args.hasArg(options::OPT_fobjc_link_runtime); -} - -static bool forwardToGCC(const Option &O) { - // Don't forward inputs from the original command line. They are added from - // InputInfoList. - return O.getKind() != Option::InputClass && - !O.hasFlag(options::DriverOption) && !O.hasFlag(options::LinkerInput); -} - -/// Apply \a Work on the current tool chain \a RegularToolChain and any other -/// offloading tool chain that is associated with the current action \a JA. -static void -forAllAssociatedToolChains(Compilation &C, const JobAction &JA, - const ToolChain &RegularToolChain, - llvm::function_ref Work) { - // Apply Work on the current/regular tool chain. - Work(RegularToolChain); - - // Apply Work on all the offloading tool chains associated with the current - // action. - if (JA.isHostOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain()); - else if (JA.isDeviceOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain()); - - // - // TODO: Add support for other offloading programming models here. - // -} - -void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, - const Driver &D, const ArgList &Args, - ArgStringList &CmdArgs, - const InputInfo &Output, - const InputInfoList &Inputs) const { - Arg *A; - const bool IsIAMCU = getToolChain().getTriple().isOSIAMCU(); - - CheckPreprocessingOptions(D, Args); - - Args.AddLastArg(CmdArgs, options::OPT_C); - Args.AddLastArg(CmdArgs, options::OPT_CC); - - // Handle dependency file generation. - if ((A = Args.getLastArg(options::OPT_M, options::OPT_MM)) || - (A = Args.getLastArg(options::OPT_MD)) || - (A = Args.getLastArg(options::OPT_MMD))) { - // Determine the output location. - const char *DepFile; - if (Arg *MF = Args.getLastArg(options::OPT_MF)) { - DepFile = MF->getValue(); - C.addFailureResultFile(DepFile, &JA); - } else if (Output.getType() == types::TY_Dependencies) { - DepFile = Output.getFilename(); - } else if (A->getOption().matches(options::OPT_M) || - A->getOption().matches(options::OPT_MM)) { - DepFile = "-"; - } else { - DepFile = getDependencyFileName(Args, Inputs); - C.addFailureResultFile(DepFile, &JA); - } - CmdArgs.push_back("-dependency-file"); - CmdArgs.push_back(DepFile); - - // Add a default target if one wasn't specified. - if (!Args.hasArg(options::OPT_MT) && !Args.hasArg(options::OPT_MQ)) { - const char *DepTarget; - - // If user provided -o, that is the dependency target, except - // when we are only generating a dependency file. - Arg *OutputOpt = Args.getLastArg(options::OPT_o); - if (OutputOpt && Output.getType() != types::TY_Dependencies) { - DepTarget = OutputOpt->getValue(); - } else { - // Otherwise derive from the base input. - // - // FIXME: This should use the computed output file location. - SmallString<128> P(Inputs[0].getBaseInput()); - llvm::sys::path::replace_extension(P, "o"); - DepTarget = Args.MakeArgString(llvm::sys::path::filename(P)); - } - - CmdArgs.push_back("-MT"); - SmallString<128> Quoted; - QuoteTarget(DepTarget, Quoted); - CmdArgs.push_back(Args.MakeArgString(Quoted)); - } - - if (A->getOption().matches(options::OPT_M) || - A->getOption().matches(options::OPT_MD)) - CmdArgs.push_back("-sys-header-deps"); - if ((isa(JA) && - !Args.hasArg(options::OPT_fno_module_file_deps)) || - Args.hasArg(options::OPT_fmodule_file_deps)) - CmdArgs.push_back("-module-file-deps"); - } - - if (Args.hasArg(options::OPT_MG)) { - if (!A || A->getOption().matches(options::OPT_MD) || - A->getOption().matches(options::OPT_MMD)) - D.Diag(diag::err_drv_mg_requires_m_or_mm); - CmdArgs.push_back("-MG"); - } - - Args.AddLastArg(CmdArgs, options::OPT_MP); - Args.AddLastArg(CmdArgs, options::OPT_MV); - - // Convert all -MQ args to -MT - for (const Arg *A : Args.filtered(options::OPT_MT, options::OPT_MQ)) { - A->claim(); - - if (A->getOption().matches(options::OPT_MQ)) { - CmdArgs.push_back("-MT"); - SmallString<128> Quoted; - QuoteTarget(A->getValue(), Quoted); - CmdArgs.push_back(Args.MakeArgString(Quoted)); - - // -MT flag - no change - } else { - A->render(Args, CmdArgs); - } - } - - // Add offload include arguments specific for CUDA. This must happen before - // we -I or -include anything else, because we must pick up the CUDA headers - // from the particular CUDA installation, rather than from e.g. - // /usr/local/include. - if (JA.isOffloading(Action::OFK_Cuda)) - getToolChain().AddCudaIncludeArgs(Args, CmdArgs); - - // Add -i* options, and automatically translate to - // -include-pch/-include-pth for transparent PCH support. It's - // wonky, but we include looking for .gch so we can support seamless - // replacement into a build system already set up to be generating - // .gch files. - int YcIndex = -1, YuIndex = -1; - { - int AI = -1; - const Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc); - const Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu); - for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) { - // Walk the whole i_Group and skip non "-include" flags so that the index - // here matches the index in the next loop below. - ++AI; - if (!A->getOption().matches(options::OPT_include)) - continue; - if (YcArg && strcmp(A->getValue(), YcArg->getValue()) == 0) - YcIndex = AI; - if (YuArg && strcmp(A->getValue(), YuArg->getValue()) == 0) - YuIndex = AI; - } - } - if (isa(JA) && YcIndex != -1) { - Driver::InputList Inputs; - D.BuildInputs(getToolChain(), C.getArgs(), Inputs); - assert(Inputs.size() == 1 && "Need one input when building pch"); - CmdArgs.push_back(Args.MakeArgString(Twine("-find-pch-source=") + - Inputs[0].second->getValue())); - } - - bool RenderedImplicitInclude = false; - int AI = -1; - for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) { - ++AI; - - if (getToolChain().getDriver().IsCLMode() && - A->getOption().matches(options::OPT_include)) { - // In clang-cl mode, /Ycfoo.h means that all code up to a foo.h - // include is compiled into foo.h, and everything after goes into - // the .obj file. /Yufoo.h means that all includes prior to and including - // foo.h are completely skipped and replaced with a use of the pch file - // for foo.h. (Each flag can have at most one value, multiple /Yc flags - // just mean that the last one wins.) If /Yc and /Yu are both present - // and refer to the same file, /Yc wins. - // Note that OPT__SLASH_FI gets mapped to OPT_include. - // FIXME: The code here assumes that /Yc and /Yu refer to the same file. - // cl.exe seems to support both flags with different values, but that - // seems strange (which flag does /Fp now refer to?), so don't implement - // that until someone needs it. - int PchIndex = YcIndex != -1 ? YcIndex : YuIndex; - if (PchIndex != -1) { - if (isa(JA)) { - // When building the pch, skip all includes after the pch. - assert(YcIndex != -1 && PchIndex == YcIndex); - if (AI >= YcIndex) - continue; - } else { - // When using the pch, skip all includes prior to the pch. - if (AI < PchIndex) { - A->claim(); - continue; - } - if (AI == PchIndex) { - A->claim(); - CmdArgs.push_back("-include-pch"); - CmdArgs.push_back( - Args.MakeArgString(D.GetClPchPath(C, A->getValue()))); - continue; - } - } - } - } else if (A->getOption().matches(options::OPT_include)) { - // Handling of gcc-style gch precompiled headers. - bool IsFirstImplicitInclude = !RenderedImplicitInclude; - RenderedImplicitInclude = true; - - // Use PCH if the user requested it. - bool UsePCH = D.CCCUsePCH; - - bool FoundPTH = false; - bool FoundPCH = false; - SmallString<128> P(A->getValue()); - // We want the files to have a name like foo.h.pch. Add a dummy extension - // so that replace_extension does the right thing. - P += ".dummy"; - if (UsePCH) { - llvm::sys::path::replace_extension(P, "pch"); - if (llvm::sys::fs::exists(P)) - FoundPCH = true; - } - - if (!FoundPCH) { - llvm::sys::path::replace_extension(P, "pth"); - if (llvm::sys::fs::exists(P)) - FoundPTH = true; - } - - if (!FoundPCH && !FoundPTH) { - llvm::sys::path::replace_extension(P, "gch"); - if (llvm::sys::fs::exists(P)) { - FoundPCH = UsePCH; - FoundPTH = !UsePCH; - } - } - - if (FoundPCH || FoundPTH) { - if (IsFirstImplicitInclude) { - A->claim(); - if (UsePCH) - CmdArgs.push_back("-include-pch"); - else - CmdArgs.push_back("-include-pth"); - CmdArgs.push_back(Args.MakeArgString(P)); - continue; - } else { - // Ignore the PCH if not first on command line and emit warning. - D.Diag(diag::warn_drv_pch_not_first_include) << P - << A->getAsString(Args); - } - } - } else if (A->getOption().matches(options::OPT_isystem_after)) { - // Handling of paths which must come late. These entries are handled by - // the toolchain itself after the resource dir is inserted in the right - // search order. - // Do not claim the argument so that the use of the argument does not - // silently go unnoticed on toolchains which do not honour the option. - continue; - } - - // Not translated, render as usual. - A->claim(); - A->render(Args, CmdArgs); - } - - Args.AddAllArgs(CmdArgs, - {options::OPT_D, options::OPT_U, options::OPT_I_Group, - options::OPT_F, options::OPT_index_header_map}); - - // Add -Wp, and -Xpreprocessor if using the preprocessor. - - // FIXME: There is a very unfortunate problem here, some troubled - // souls abuse -Wp, to pass preprocessor options in gcc syntax. To - // really support that we would have to parse and then translate - // those options. :( - Args.AddAllArgValues(CmdArgs, options::OPT_Wp_COMMA, - options::OPT_Xpreprocessor); - - // -I- is a deprecated GCC feature, reject it. - if (Arg *A = Args.getLastArg(options::OPT_I_)) - D.Diag(diag::err_drv_I_dash_not_supported) << A->getAsString(Args); - - // If we have a --sysroot, and don't have an explicit -isysroot flag, add an - // -isysroot to the CC1 invocation. - StringRef sysroot = C.getSysRoot(); - if (sysroot != "") { - if (!Args.hasArg(options::OPT_isysroot)) { - CmdArgs.push_back("-isysroot"); - CmdArgs.push_back(C.getArgs().MakeArgString(sysroot)); - } - } - - // Parse additional include paths from environment variables. - // FIXME: We should probably sink the logic for handling these from the - // frontend into the driver. It will allow deleting 4 otherwise unused flags. - // CPATH - included following the user specified includes (but prior to - // builtin and standard includes). - addDirectoryList(Args, CmdArgs, "-I", "CPATH"); - // C_INCLUDE_PATH - system includes enabled when compiling C. - addDirectoryList(Args, CmdArgs, "-c-isystem", "C_INCLUDE_PATH"); - // CPLUS_INCLUDE_PATH - system includes enabled when compiling C++. - addDirectoryList(Args, CmdArgs, "-cxx-isystem", "CPLUS_INCLUDE_PATH"); - // OBJC_INCLUDE_PATH - system includes enabled when compiling ObjC. - addDirectoryList(Args, CmdArgs, "-objc-isystem", "OBJC_INCLUDE_PATH"); - // OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++. - addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH"); - - // While adding the include arguments, we also attempt to retrieve the - // arguments of related offloading toolchains or arguments that are specific - // of an offloading programming model. - - // Add C++ include arguments, if needed. - if (types::isCXX(Inputs[0].getType())) - forAllAssociatedToolChains(C, JA, getToolChain(), - [&Args, &CmdArgs](const ToolChain &TC) { - TC.AddClangCXXStdlibIncludeArgs(Args, CmdArgs); - }); - - // Add system include arguments for all targets but IAMCU. - if (!IsIAMCU) - forAllAssociatedToolChains(C, JA, getToolChain(), - [&Args, &CmdArgs](const ToolChain &TC) { - TC.AddClangSystemIncludeArgs(Args, CmdArgs); - }); - else { - // For IAMCU add special include arguments. - getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs); - } -} - -// FIXME: Move to target hook. -static bool isSignedCharDefault(const llvm::Triple &Triple) { - switch (Triple.getArch()) { - default: - return true; - - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - if (Triple.isOSDarwin() || Triple.isOSWindows()) - return true; - return false; - - case llvm::Triple::ppc: - case llvm::Triple::ppc64: - if (Triple.isOSDarwin()) - return true; - return false; - - case llvm::Triple::hexagon: - case llvm::Triple::ppc64le: - case llvm::Triple::systemz: - case llvm::Triple::xcore: - return false; - } -} - -static bool isNoCommonDefault(const llvm::Triple &Triple) { - switch (Triple.getArch()) { - default: - return false; - - case llvm::Triple::xcore: - case llvm::Triple::wasm32: - case llvm::Triple::wasm64: - return true; - } -} - -// ARM tools start. - -// Get SubArch (vN). -static int getARMSubArchVersionNumber(const llvm::Triple &Triple) { - llvm::StringRef Arch = Triple.getArchName(); - return llvm::ARM::parseArchVersion(Arch); -} - -// True if M-profile. -static bool isARMMProfile(const llvm::Triple &Triple) { - llvm::StringRef Arch = Triple.getArchName(); - unsigned Profile = llvm::ARM::parseArchProfile(Arch); - return Profile == llvm::ARM::PK_M; -} - -// Get Arch/CPU from args. -static void getARMArchCPUFromArgs(const ArgList &Args, llvm::StringRef &Arch, - llvm::StringRef &CPU, bool FromAs = false) { - if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) - CPU = A->getValue(); - if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) - Arch = A->getValue(); - if (!FromAs) - return; - - for (const Arg *A : - Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { - StringRef Value = A->getValue(); - if (Value.startswith("-mcpu=")) - CPU = Value.substr(6); - if (Value.startswith("-march=")) - Arch = Value.substr(7); - } -} - -// Handle -mhwdiv=. -// FIXME: Use ARMTargetParser. -static void getARMHWDivFeatures(const Driver &D, const Arg *A, - const ArgList &Args, StringRef HWDiv, - std::vector &Features) { - unsigned HWDivID = llvm::ARM::parseHWDiv(HWDiv); - if (!llvm::ARM::getHWDivFeatures(HWDivID, Features)) - D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); -} - -// Handle -mfpu=. -static void getARMFPUFeatures(const Driver &D, const Arg *A, - const ArgList &Args, StringRef FPU, - std::vector &Features) { - unsigned FPUID = llvm::ARM::parseFPU(FPU); - if (!llvm::ARM::getFPUFeatures(FPUID, Features)) - D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); -} - -// Decode ARM features from string like +[no]featureA+[no]featureB+... -static bool DecodeARMFeatures(const Driver &D, StringRef text, - std::vector &Features) { - SmallVector Split; - text.split(Split, StringRef("+"), -1, false); - - for (StringRef Feature : Split) { - StringRef FeatureName = llvm::ARM::getArchExtFeature(Feature); - if (!FeatureName.empty()) - Features.push_back(FeatureName); - else - return false; - } - return true; -} - -// Check if -march is valid by checking if it can be canonicalised and parsed. -// getARMArch is used here instead of just checking the -march value in order -// to handle -march=native correctly. -static void checkARMArchName(const Driver &D, const Arg *A, const ArgList &Args, - llvm::StringRef ArchName, - std::vector &Features, - const llvm::Triple &Triple) { - std::pair Split = ArchName.split("+"); - - std::string MArch = arm::getARMArch(ArchName, Triple); - if (llvm::ARM::parseArch(MArch) == llvm::ARM::AK_INVALID || - (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features))) - D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); -} - -// Check -mcpu=. Needs ArchName to handle -mcpu=generic. -static void checkARMCPUName(const Driver &D, const Arg *A, const ArgList &Args, - llvm::StringRef CPUName, llvm::StringRef ArchName, - std::vector &Features, - const llvm::Triple &Triple) { - std::pair Split = CPUName.split("+"); - - std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); - if (arm::getLLVMArchSuffixForARM(CPU, ArchName, Triple).empty() || - (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features))) - D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); -} - -static bool useAAPCSForMachO(const llvm::Triple &T) { - // The backend is hardwired to assume AAPCS for M-class processors, ensure - // the frontend matches that. - return T.getEnvironment() == llvm::Triple::EABI || - T.getOS() == llvm::Triple::UnknownOS || isARMMProfile(T); -} - -// Select the float ABI as determined by -msoft-float, -mhard-float, and -// -mfloat-abi=. -arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) { - const Driver &D = TC.getDriver(); - const llvm::Triple &Triple = TC.getEffectiveTriple(); - auto SubArch = getARMSubArchVersionNumber(Triple); - arm::FloatABI ABI = FloatABI::Invalid; - if (Arg *A = - Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, - options::OPT_mfloat_abi_EQ)) { - if (A->getOption().matches(options::OPT_msoft_float)) { - ABI = FloatABI::Soft; - } else if (A->getOption().matches(options::OPT_mhard_float)) { - ABI = FloatABI::Hard; - } else { - ABI = llvm::StringSwitch(A->getValue()) - .Case("soft", FloatABI::Soft) - .Case("softfp", FloatABI::SoftFP) - .Case("hard", FloatABI::Hard) - .Default(FloatABI::Invalid); - if (ABI == FloatABI::Invalid && !StringRef(A->getValue()).empty()) { - D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); - ABI = FloatABI::Soft; - } - } - - // It is incorrect to select hard float ABI on MachO platforms if the ABI is - // "apcs-gnu". - if (Triple.isOSBinFormatMachO() && !useAAPCSForMachO(Triple) && - ABI == FloatABI::Hard) { - D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) - << Triple.getArchName(); - } - } - - // If unspecified, choose the default based on the platform. - if (ABI == FloatABI::Invalid) { - switch (Triple.getOS()) { - case llvm::Triple::Darwin: - case llvm::Triple::MacOSX: - case llvm::Triple::IOS: - case llvm::Triple::TvOS: { - // Darwin defaults to "softfp" for v6 and v7. - ABI = (SubArch == 6 || SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft; - ABI = Triple.isWatchABI() ? FloatABI::Hard : ABI; - break; - } - case llvm::Triple::WatchOS: - ABI = FloatABI::Hard; - break; - - // FIXME: this is invalid for WindowsCE - case llvm::Triple::Win32: - ABI = FloatABI::Hard; - break; - - case llvm::Triple::FreeBSD: - switch (Triple.getEnvironment()) { - case llvm::Triple::GNUEABIHF: - ABI = FloatABI::Hard; - break; - default: - // FreeBSD defaults to soft float - ABI = FloatABI::Soft; - break; - } - break; - - default: - switch (Triple.getEnvironment()) { - case llvm::Triple::GNUEABIHF: - case llvm::Triple::MuslEABIHF: - case llvm::Triple::EABIHF: - ABI = FloatABI::Hard; - break; - case llvm::Triple::GNUEABI: - case llvm::Triple::MuslEABI: - case llvm::Triple::EABI: - // EABI is always AAPCS, and if it was not marked 'hard', it's softfp - ABI = FloatABI::SoftFP; - break; - case llvm::Triple::Android: - ABI = (SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft; - break; - default: - // Assume "soft", but warn the user we are guessing. - if (Triple.isOSBinFormatMachO() && - Triple.getSubArch() == llvm::Triple::ARMSubArch_v7em) - ABI = FloatABI::Hard; - else - ABI = FloatABI::Soft; - - if (Triple.getOS() != llvm::Triple::UnknownOS || - !Triple.isOSBinFormatMachO()) - D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft"; - break; - } - } - } - - assert(ABI != FloatABI::Invalid && "must select an ABI"); - return ABI; -} - -static void getARMTargetFeatures(const ToolChain &TC, - const llvm::Triple &Triple, - const ArgList &Args, - ArgStringList &CmdArgs, - std::vector &Features, - bool ForAS) { - const Driver &D = TC.getDriver(); - - bool KernelOrKext = - Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); - arm::FloatABI ABI = arm::getARMFloatABI(TC, Args); - const Arg *WaCPU = nullptr, *WaFPU = nullptr; - const Arg *WaHDiv = nullptr, *WaArch = nullptr; - - if (!ForAS) { - // FIXME: Note, this is a hack, the LLVM backend doesn't actually use these - // yet (it uses the -mfloat-abi and -msoft-float options), and it is - // stripped out by the ARM target. We should probably pass this a new - // -target-option, which is handled by the -cc1/-cc1as invocation. - // - // FIXME2: For consistency, it would be ideal if we set up the target - // machine state the same when using the frontend or the assembler. We don't - // currently do that for the assembler, we pass the options directly to the - // backend and never even instantiate the frontend TargetInfo. If we did, - // and used its handleTargetFeatures hook, then we could ensure the - // assembler and the frontend behave the same. - - // Use software floating point operations? - if (ABI == arm::FloatABI::Soft) - Features.push_back("+soft-float"); - - // Use software floating point argument passing? - if (ABI != arm::FloatABI::Hard) - Features.push_back("+soft-float-abi"); - } else { - // Here, we make sure that -Wa,-mfpu/cpu/arch/hwdiv will be passed down - // to the assembler correctly. - for (const Arg *A : - Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { - StringRef Value = A->getValue(); - if (Value.startswith("-mfpu=")) { - WaFPU = A; - } else if (Value.startswith("-mcpu=")) { - WaCPU = A; - } else if (Value.startswith("-mhwdiv=")) { - WaHDiv = A; - } else if (Value.startswith("-march=")) { - WaArch = A; - } - } - } - - // Check -march. ClangAs gives preference to -Wa,-march=. - const Arg *ArchArg = Args.getLastArg(options::OPT_march_EQ); - StringRef ArchName; - if (WaArch) { - if (ArchArg) - D.Diag(clang::diag::warn_drv_unused_argument) - << ArchArg->getAsString(Args); - ArchName = StringRef(WaArch->getValue()).substr(7); - checkARMArchName(D, WaArch, Args, ArchName, Features, Triple); - // FIXME: Set Arch. - D.Diag(clang::diag::warn_drv_unused_argument) << WaArch->getAsString(Args); - } else if (ArchArg) { - ArchName = ArchArg->getValue(); - checkARMArchName(D, ArchArg, Args, ArchName, Features, Triple); - } - - // Check -mcpu. ClangAs gives preference to -Wa,-mcpu=. - const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ); - StringRef CPUName; - if (WaCPU) { - if (CPUArg) - D.Diag(clang::diag::warn_drv_unused_argument) - << CPUArg->getAsString(Args); - CPUName = StringRef(WaCPU->getValue()).substr(6); - checkARMCPUName(D, WaCPU, Args, CPUName, ArchName, Features, Triple); - } else if (CPUArg) { - CPUName = CPUArg->getValue(); - checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, Features, Triple); - } - - // Add CPU features for generic CPUs - if (CPUName == "native") { - llvm::StringMap HostFeatures; - if (llvm::sys::getHostCPUFeatures(HostFeatures)) - for (auto &F : HostFeatures) - Features.push_back( - Args.MakeArgString((F.second ? "+" : "-") + F.first())); - } - - // Honor -mfpu=. ClangAs gives preference to -Wa,-mfpu=. - const Arg *FPUArg = Args.getLastArg(options::OPT_mfpu_EQ); - if (WaFPU) { - if (FPUArg) - D.Diag(clang::diag::warn_drv_unused_argument) - << FPUArg->getAsString(Args); - getARMFPUFeatures(D, WaFPU, Args, StringRef(WaFPU->getValue()).substr(6), - Features); - } else if (FPUArg) { - getARMFPUFeatures(D, FPUArg, Args, FPUArg->getValue(), Features); - } - - // Honor -mhwdiv=. ClangAs gives preference to -Wa,-mhwdiv=. - const Arg *HDivArg = Args.getLastArg(options::OPT_mhwdiv_EQ); - if (WaHDiv) { - if (HDivArg) - D.Diag(clang::diag::warn_drv_unused_argument) - << HDivArg->getAsString(Args); - getARMHWDivFeatures(D, WaHDiv, Args, - StringRef(WaHDiv->getValue()).substr(8), Features); - } else if (HDivArg) - getARMHWDivFeatures(D, HDivArg, Args, HDivArg->getValue(), Features); - - // Setting -msoft-float effectively disables NEON because of the GCC - // implementation, although the same isn't true of VFP or VFP3. - if (ABI == arm::FloatABI::Soft) { - Features.push_back("-neon"); - // Also need to explicitly disable features which imply NEON. - Features.push_back("-crypto"); - } - - // En/disable crc code generation. - if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) { - if (A->getOption().matches(options::OPT_mcrc)) - Features.push_back("+crc"); - else - Features.push_back("-crc"); - } - - // Look for the last occurrence of -mlong-calls or -mno-long-calls. If - // neither options are specified, see if we are compiling for kernel/kext and - // decide whether to pass "+long-calls" based on the OS and its version. - if (Arg *A = Args.getLastArg(options::OPT_mlong_calls, - options::OPT_mno_long_calls)) { - if (A->getOption().matches(options::OPT_mlong_calls)) - Features.push_back("+long-calls"); - } else if (KernelOrKext && (!Triple.isiOS() || Triple.isOSVersionLT(6)) && - !Triple.isWatchOS()) { - Features.push_back("+long-calls"); - } - - // Generate execute-only output (no data access to code sections). - // Supported only on ARMv6T2 and ARMv7 and above. - // Cannot be combined with -mno-movt or -mlong-calls - if (Arg *A = Args.getLastArg(options::OPT_mexecute_only, options::OPT_mno_execute_only)) { - if (A->getOption().matches(options::OPT_mexecute_only)) { - if (getARMSubArchVersionNumber(Triple) < 7 && - llvm::ARM::parseArch(Triple.getArchName()) != llvm::ARM::AK_ARMV6T2) - D.Diag(diag::err_target_unsupported_execute_only) << Triple.getArchName(); - else if (Arg *B = Args.getLastArg(options::OPT_mno_movt)) - D.Diag(diag::err_opt_not_valid_with_opt) << A->getAsString(Args) << B->getAsString(Args); - // Long calls create constant pool entries and have not yet been fixed up - // to play nicely with execute-only. Hence, they cannot be used in - // execute-only code for now - else if (Arg *B = Args.getLastArg(options::OPT_mlong_calls, options::OPT_mno_long_calls)) { - if (B->getOption().matches(options::OPT_mlong_calls)) - D.Diag(diag::err_opt_not_valid_with_opt) << A->getAsString(Args) << B->getAsString(Args); - } - - CmdArgs.push_back("-arm-execute-only"); - } - } - - // Kernel code has more strict alignment requirements. - if (KernelOrKext) - Features.push_back("+strict-align"); - else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, - options::OPT_munaligned_access)) { - if (A->getOption().matches(options::OPT_munaligned_access)) { - // No v6M core supports unaligned memory access (v6M ARM ARM A3.2). - if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) - D.Diag(diag::err_target_unsupported_unaligned) << "v6m"; - // v8M Baseline follows on from v6M, so doesn't support unaligned memory - // access either. - else if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline) - D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base"; - } else - Features.push_back("+strict-align"); - } else { - // Assume pre-ARMv6 doesn't support unaligned accesses. - // - // ARMv6 may or may not support unaligned accesses depending on the - // SCTLR.U bit, which is architecture-specific. We assume ARMv6 - // Darwin and NetBSD targets support unaligned accesses, and others don't. - // - // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit - // which raises an alignment fault on unaligned accesses. Linux - // defaults this bit to 0 and handles it as a system-wide (not - // per-process) setting. It is therefore safe to assume that ARMv7+ - // Linux targets support unaligned accesses. The same goes for NaCl. - // - // The above behavior is consistent with GCC. - int VersionNum = getARMSubArchVersionNumber(Triple); - if (Triple.isOSDarwin() || Triple.isOSNetBSD()) { - if (VersionNum < 6 || - Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) - Features.push_back("+strict-align"); - } else if (Triple.isOSLinux() || Triple.isOSNaCl()) { - if (VersionNum < 7) - Features.push_back("+strict-align"); - } else - Features.push_back("+strict-align"); - } - - // llvm does not support reserving registers in general. There is support - // for reserving r9 on ARM though (defined as a platform-specific register - // in ARM EABI). - if (Args.hasArg(options::OPT_ffixed_r9)) - Features.push_back("+reserve-r9"); - - // The kext linker doesn't know how to deal with movw/movt. - if (KernelOrKext || Args.hasArg(options::OPT_mno_movt)) - Features.push_back("+no-movt"); -} - -void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args, - ArgStringList &CmdArgs, bool KernelOrKext) const { - // Select the ABI to use. - // FIXME: Support -meabi. - // FIXME: Parts of this are duplicated in the backend, unify this somehow. - const char *ABIName = nullptr; - if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { - ABIName = A->getValue(); - } else if (Triple.isOSBinFormatMachO()) { - if (useAAPCSForMachO(Triple)) { - ABIName = "aapcs"; - } else if (Triple.isWatchABI()) { - ABIName = "aapcs16"; - } else { - ABIName = "apcs-gnu"; - } - } else if (Triple.isOSWindows()) { - // FIXME: this is invalid for WindowsCE - ABIName = "aapcs"; - } else { - // Select the default based on the platform. - switch (Triple.getEnvironment()) { - case llvm::Triple::Android: - case llvm::Triple::GNUEABI: - case llvm::Triple::GNUEABIHF: - case llvm::Triple::MuslEABI: - case llvm::Triple::MuslEABIHF: - ABIName = "aapcs-linux"; - break; - case llvm::Triple::EABIHF: - case llvm::Triple::EABI: - ABIName = "aapcs"; - break; - default: - if (Triple.getOS() == llvm::Triple::NetBSD) - ABIName = "apcs-gnu"; - else - ABIName = "aapcs"; - break; - } - } - CmdArgs.push_back("-target-abi"); - CmdArgs.push_back(ABIName); - - // Determine floating point ABI from the options & target defaults. - arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args); - if (ABI == arm::FloatABI::Soft) { - // Floating point operations and argument passing are soft. - // FIXME: This changes CPP defines, we need -target-soft-float. - CmdArgs.push_back("-msoft-float"); - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("soft"); - } else if (ABI == arm::FloatABI::SoftFP) { - // Floating point operations are hard, but argument passing is soft. - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("soft"); - } else { - // Floating point operations and argument passing are hard. - assert(ABI == arm::FloatABI::Hard && "Invalid float abi!"); - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("hard"); - } - - // Forward the -mglobal-merge option for explicit control over the pass. - if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge, - options::OPT_mno_global_merge)) { - CmdArgs.push_back("-backend-option"); - if (A->getOption().matches(options::OPT_mno_global_merge)) - CmdArgs.push_back("-arm-global-merge=false"); - else - CmdArgs.push_back("-arm-global-merge=true"); - } - - if (!Args.hasFlag(options::OPT_mimplicit_float, - options::OPT_mno_implicit_float, true)) - CmdArgs.push_back("-no-implicit-float"); -} -// ARM tools end. - -/// getAArch64TargetCPU - Get the (LLVM) name of the AArch64 cpu we are -/// targeting. Set \p A to the Arg corresponding to the -mcpu or -mtune -/// arguments if they are provided, or to nullptr otherwise. -static std::string getAArch64TargetCPU(const ArgList &Args, Arg *&A) { - std::string CPU; - // If we have -mtune or -mcpu, use that. - if ((A = Args.getLastArg(options::OPT_mtune_EQ))) { - CPU = StringRef(A->getValue()).lower(); - } else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) { - StringRef Mcpu = A->getValue(); - CPU = Mcpu.split("+").first.lower(); - } - - // Handle CPU name is 'native'. - if (CPU == "native") - return llvm::sys::getHostCPUName(); - else if (CPU.size()) - return CPU; - - // Make sure we pick "cyclone" if -arch is used. - // FIXME: Should this be picked by checking the target triple instead? - if (Args.getLastArg(options::OPT_arch)) - return "cyclone"; - - return "generic"; -} - -void Clang::AddAArch64TargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); - - if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) || - Args.hasArg(options::OPT_mkernel) || - Args.hasArg(options::OPT_fapple_kext)) - CmdArgs.push_back("-disable-red-zone"); - - if (!Args.hasFlag(options::OPT_mimplicit_float, - options::OPT_mno_implicit_float, true)) - CmdArgs.push_back("-no-implicit-float"); - - const char *ABIName = nullptr; - if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) - ABIName = A->getValue(); - else if (Triple.isOSDarwin()) - ABIName = "darwinpcs"; - else - ABIName = "aapcs"; - - CmdArgs.push_back("-target-abi"); - CmdArgs.push_back(ABIName); - - if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a53_835769, - options::OPT_mno_fix_cortex_a53_835769)) { - CmdArgs.push_back("-backend-option"); - if (A->getOption().matches(options::OPT_mfix_cortex_a53_835769)) - CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1"); - else - CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=0"); - } else if (Triple.isAndroid()) { - // Enabled A53 errata (835769) workaround by default on android - CmdArgs.push_back("-backend-option"); - CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1"); - } - - // Forward the -mglobal-merge option for explicit control over the pass. - if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge, - options::OPT_mno_global_merge)) { - CmdArgs.push_back("-backend-option"); - if (A->getOption().matches(options::OPT_mno_global_merge)) - CmdArgs.push_back("-aarch64-global-merge=false"); - else - CmdArgs.push_back("-aarch64-global-merge=true"); - } -} - -// Get CPU and ABI names. They are not independent -// so we have to calculate them together. -void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, - StringRef &CPUName, StringRef &ABIName) { - const char *DefMips32CPU = "mips32r2"; - const char *DefMips64CPU = "mips64r2"; - - // MIPS32r6 is the default for mips(el)?-img-linux-gnu and MIPS64r6 is the - // default for mips64(el)?-img-linux-gnu. - if (Triple.getVendor() == llvm::Triple::ImaginationTechnologies && - Triple.getEnvironment() == llvm::Triple::GNU) { - DefMips32CPU = "mips32r6"; - DefMips64CPU = "mips64r6"; - } - - // MIPS64r6 is the default for Android MIPS64 (mips64el-linux-android). - if (Triple.isAndroid()) { - DefMips32CPU = "mips32"; - DefMips64CPU = "mips64r6"; - } - - // MIPS3 is the default for mips64*-unknown-openbsd. - if (Triple.getOS() == llvm::Triple::OpenBSD) - DefMips64CPU = "mips3"; - - if (Arg *A = Args.getLastArg(options::OPT_march_EQ, options::OPT_mcpu_EQ)) - CPUName = A->getValue(); - - if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { - ABIName = A->getValue(); - // Convert a GNU style Mips ABI name to the name - // accepted by LLVM Mips backend. - ABIName = llvm::StringSwitch(ABIName) - .Case("32", "o32") - .Case("64", "n64") - .Default(ABIName); - } - - // Setup default CPU and ABI names. - if (CPUName.empty() && ABIName.empty()) { - switch (Triple.getArch()) { - default: - llvm_unreachable("Unexpected triple arch name"); - case llvm::Triple::mips: - case llvm::Triple::mipsel: - CPUName = DefMips32CPU; - break; - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - CPUName = DefMips64CPU; - break; - } - } - - if (ABIName.empty() && - (Triple.getVendor() == llvm::Triple::MipsTechnologies || - Triple.getVendor() == llvm::Triple::ImaginationTechnologies)) { - ABIName = llvm::StringSwitch(CPUName) - .Case("mips1", "o32") - .Case("mips2", "o32") - .Case("mips3", "n64") - .Case("mips4", "n64") - .Case("mips5", "n64") - .Case("mips32", "o32") - .Case("mips32r2", "o32") - .Case("mips32r3", "o32") - .Case("mips32r5", "o32") - .Case("mips32r6", "o32") - .Case("mips64", "n64") - .Case("mips64r2", "n64") - .Case("mips64r3", "n64") - .Case("mips64r5", "n64") - .Case("mips64r6", "n64") - .Case("octeon", "n64") - .Case("p5600", "o32") - .Default(""); - } - - if (ABIName.empty()) { - // Deduce ABI name from the target triple. - if (Triple.getArch() == llvm::Triple::mips || - Triple.getArch() == llvm::Triple::mipsel) - ABIName = "o32"; - else - ABIName = "n64"; - } - - if (CPUName.empty()) { - // Deduce CPU name from ABI name. - CPUName = llvm::StringSwitch(ABIName) - .Case("o32", DefMips32CPU) - .Cases("n32", "n64", DefMips64CPU) - .Default(""); - } - - // FIXME: Warn on inconsistent use of -march and -mabi. -} - -std::string mips::getMipsABILibSuffix(const ArgList &Args, - const llvm::Triple &Triple) { - StringRef CPUName, ABIName; - tools::mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); - return llvm::StringSwitch(ABIName) - .Case("o32", "") - .Case("n32", "32") - .Case("n64", "64"); -} - -// Convert ABI name to the GNU tools acceptable variant. -static StringRef getGnuCompatibleMipsABIName(StringRef ABI) { - return llvm::StringSwitch(ABI) - .Case("o32", "32") - .Case("n64", "64") - .Default(ABI); -} - -// Select the MIPS float ABI as determined by -msoft-float, -mhard-float, -// and -mfloat-abi=. -static mips::FloatABI getMipsFloatABI(const Driver &D, const ArgList &Args) { - mips::FloatABI ABI = mips::FloatABI::Invalid; - if (Arg *A = - Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, - options::OPT_mfloat_abi_EQ)) { - if (A->getOption().matches(options::OPT_msoft_float)) - ABI = mips::FloatABI::Soft; - else if (A->getOption().matches(options::OPT_mhard_float)) - ABI = mips::FloatABI::Hard; - else { - ABI = llvm::StringSwitch(A->getValue()) - .Case("soft", mips::FloatABI::Soft) - .Case("hard", mips::FloatABI::Hard) - .Default(mips::FloatABI::Invalid); - if (ABI == mips::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { - D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); - ABI = mips::FloatABI::Hard; - } - } - } - - // If unspecified, choose the default based on the platform. - if (ABI == mips::FloatABI::Invalid) { - // Assume "hard", because it's a default value used by gcc. - // When we start to recognize specific target MIPS processors, - // we will be able to select the default more correctly. - ABI = mips::FloatABI::Hard; - } - - assert(ABI != mips::FloatABI::Invalid && "must select an ABI"); - return ABI; -} - -static void AddTargetFeature(const ArgList &Args, - std::vector &Features, - OptSpecifier OnOpt, OptSpecifier OffOpt, - StringRef FeatureName) { - if (Arg *A = Args.getLastArg(OnOpt, OffOpt)) { - if (A->getOption().matches(OnOpt)) - Features.push_back(Args.MakeArgString("+" + FeatureName)); - else - Features.push_back(Args.MakeArgString("-" + FeatureName)); - } -} - -static void getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args, - std::vector &Features) { - StringRef CPUName; - StringRef ABIName; - mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); - ABIName = getGnuCompatibleMipsABIName(ABIName); - - AddTargetFeature(Args, Features, options::OPT_mno_abicalls, - options::OPT_mabicalls, "noabicalls"); - - mips::FloatABI FloatABI = getMipsFloatABI(D, Args); - if (FloatABI == mips::FloatABI::Soft) { - // FIXME: Note, this is a hack. We need to pass the selected float - // mode to the MipsTargetInfoBase to define appropriate macros there. - // Now it is the only method. - Features.push_back("+soft-float"); - } - - if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) { - StringRef Val = StringRef(A->getValue()); - if (Val == "2008") { - if (mips::getSupportedNanEncoding(CPUName) & mips::Nan2008) - Features.push_back("+nan2008"); - else { - Features.push_back("-nan2008"); - D.Diag(diag::warn_target_unsupported_nan2008) << CPUName; - } - } else if (Val == "legacy") { - if (mips::getSupportedNanEncoding(CPUName) & mips::NanLegacy) - Features.push_back("-nan2008"); - else { - Features.push_back("+nan2008"); - D.Diag(diag::warn_target_unsupported_nanlegacy) << CPUName; - } - } else - D.Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << Val; - } - - AddTargetFeature(Args, Features, options::OPT_msingle_float, - options::OPT_mdouble_float, "single-float"); - AddTargetFeature(Args, Features, options::OPT_mips16, options::OPT_mno_mips16, - "mips16"); - AddTargetFeature(Args, Features, options::OPT_mmicromips, - options::OPT_mno_micromips, "micromips"); - AddTargetFeature(Args, Features, options::OPT_mdsp, options::OPT_mno_dsp, - "dsp"); - AddTargetFeature(Args, Features, options::OPT_mdspr2, options::OPT_mno_dspr2, - "dspr2"); - AddTargetFeature(Args, Features, options::OPT_mmsa, options::OPT_mno_msa, - "msa"); - - // Add the last -mfp32/-mfpxx/-mfp64, if none are given and the ABI is O32 - // pass -mfpxx, or if none are given and fp64a is default, pass fp64 and - // nooddspreg. - if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx, - options::OPT_mfp64)) { - if (A->getOption().matches(options::OPT_mfp32)) - Features.push_back(Args.MakeArgString("-fp64")); - else if (A->getOption().matches(options::OPT_mfpxx)) { - Features.push_back(Args.MakeArgString("+fpxx")); - Features.push_back(Args.MakeArgString("+nooddspreg")); - } else - Features.push_back(Args.MakeArgString("+fp64")); - } else if (mips::shouldUseFPXX(Args, Triple, CPUName, ABIName, FloatABI)) { - Features.push_back(Args.MakeArgString("+fpxx")); - Features.push_back(Args.MakeArgString("+nooddspreg")); - } else if (mips::isFP64ADefault(Triple, CPUName)) { - Features.push_back(Args.MakeArgString("+fp64")); - Features.push_back(Args.MakeArgString("+nooddspreg")); - } - - AddTargetFeature(Args, Features, options::OPT_mno_odd_spreg, - options::OPT_modd_spreg, "nooddspreg"); -} - -void Clang::AddMIPSTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - const Driver &D = getToolChain().getDriver(); - StringRef CPUName; - StringRef ABIName; - const llvm::Triple &Triple = getToolChain().getTriple(); - mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); - - CmdArgs.push_back("-target-abi"); - CmdArgs.push_back(ABIName.data()); - - mips::FloatABI ABI = getMipsFloatABI(D, Args); - if (ABI == mips::FloatABI::Soft) { - // Floating point operations and argument passing are soft. - CmdArgs.push_back("-msoft-float"); - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("soft"); - } else { - // Floating point operations and argument passing are hard. - assert(ABI == mips::FloatABI::Hard && "Invalid float abi!"); - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("hard"); - } - - if (Arg *A = Args.getLastArg(options::OPT_mxgot, options::OPT_mno_xgot)) { - if (A->getOption().matches(options::OPT_mxgot)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-mxgot"); - } - } - - if (Arg *A = Args.getLastArg(options::OPT_mldc1_sdc1, - options::OPT_mno_ldc1_sdc1)) { - if (A->getOption().matches(options::OPT_mno_ldc1_sdc1)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-mno-ldc1-sdc1"); - } - } - - if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division, - options::OPT_mno_check_zero_division)) { - if (A->getOption().matches(options::OPT_mno_check_zero_division)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-mno-check-zero-division"); - } - } - - if (Arg *A = Args.getLastArg(options::OPT_G)) { - StringRef v = A->getValue(); - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString("-mips-ssection-threshold=" + v)); - A->claim(); - } - - if (Arg *A = Args.getLastArg(options::OPT_mcompact_branches_EQ)) { - StringRef Val = StringRef(A->getValue()); - if (mips::hasCompactBranches(CPUName)) { - if (Val == "never" || Val == "always" || Val == "optimal") { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString("-mips-compact-branches=" + Val)); - } else - D.Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << Val; - } else - D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName; - } -} - -/// getPPCTargetCPU - Get the (LLVM) name of the PowerPC cpu we are targeting. -static std::string getPPCTargetCPU(const ArgList &Args) { - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - StringRef CPUName = A->getValue(); - - if (CPUName == "native") { - std::string CPU = llvm::sys::getHostCPUName(); - if (!CPU.empty() && CPU != "generic") - return CPU; - else - return ""; - } - - return llvm::StringSwitch(CPUName) - .Case("common", "generic") - .Case("440", "440") - .Case("440fp", "440") - .Case("450", "450") - .Case("601", "601") - .Case("602", "602") - .Case("603", "603") - .Case("603e", "603e") - .Case("603ev", "603ev") - .Case("604", "604") - .Case("604e", "604e") - .Case("620", "620") - .Case("630", "pwr3") - .Case("G3", "g3") - .Case("7400", "7400") - .Case("G4", "g4") - .Case("7450", "7450") - .Case("G4+", "g4+") - .Case("750", "750") - .Case("970", "970") - .Case("G5", "g5") - .Case("a2", "a2") - .Case("a2q", "a2q") - .Case("e500mc", "e500mc") - .Case("e5500", "e5500") - .Case("power3", "pwr3") - .Case("power4", "pwr4") - .Case("power5", "pwr5") - .Case("power5x", "pwr5x") - .Case("power6", "pwr6") - .Case("power6x", "pwr6x") - .Case("power7", "pwr7") - .Case("power8", "pwr8") - .Case("power9", "pwr9") - .Case("pwr3", "pwr3") - .Case("pwr4", "pwr4") - .Case("pwr5", "pwr5") - .Case("pwr5x", "pwr5x") - .Case("pwr6", "pwr6") - .Case("pwr6x", "pwr6x") - .Case("pwr7", "pwr7") - .Case("pwr8", "pwr8") - .Case("pwr9", "pwr9") - .Case("powerpc", "ppc") - .Case("powerpc64", "ppc64") - .Case("powerpc64le", "ppc64le") - .Default(""); - } - - return ""; -} - -static void getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args, - std::vector &Features) { - handleTargetFeaturesGroup(Args, Features, options::OPT_m_ppc_Features_Group); - - ppc::FloatABI FloatABI = ppc::getPPCFloatABI(D, Args); - if (FloatABI == ppc::FloatABI::Soft) - Features.push_back("-hard-float"); - - // Altivec is a bit weird, allow overriding of the Altivec feature here. - AddTargetFeature(Args, Features, options::OPT_faltivec, - options::OPT_fno_altivec, "altivec"); -} - -ppc::FloatABI ppc::getPPCFloatABI(const Driver &D, const ArgList &Args) { - ppc::FloatABI ABI = ppc::FloatABI::Invalid; - if (Arg *A = - Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, - options::OPT_mfloat_abi_EQ)) { - if (A->getOption().matches(options::OPT_msoft_float)) - ABI = ppc::FloatABI::Soft; - else if (A->getOption().matches(options::OPT_mhard_float)) - ABI = ppc::FloatABI::Hard; - else { - ABI = llvm::StringSwitch(A->getValue()) - .Case("soft", ppc::FloatABI::Soft) - .Case("hard", ppc::FloatABI::Hard) - .Default(ppc::FloatABI::Invalid); - if (ABI == ppc::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { - D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); - ABI = ppc::FloatABI::Hard; - } - } - } - - // If unspecified, choose the default based on the platform. - if (ABI == ppc::FloatABI::Invalid) { - ABI = ppc::FloatABI::Hard; - } - - return ABI; -} - -void Clang::AddPPCTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - // Select the ABI to use. - const char *ABIName = nullptr; - if (getToolChain().getTriple().isOSLinux()) - switch (getToolChain().getArch()) { - case llvm::Triple::ppc64: { - // When targeting a processor that supports QPX, or if QPX is - // specifically enabled, default to using the ABI that supports QPX (so - // long as it is not specifically disabled). - bool HasQPX = false; - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) - HasQPX = A->getValue() == StringRef("a2q"); - HasQPX = Args.hasFlag(options::OPT_mqpx, options::OPT_mno_qpx, HasQPX); - if (HasQPX) { - ABIName = "elfv1-qpx"; - break; - } - - ABIName = "elfv1"; - break; - } - case llvm::Triple::ppc64le: - ABIName = "elfv2"; - break; - default: - break; - } - - if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) - // The ppc64 linux abis are all "altivec" abis by default. Accept and ignore - // the option if given as we don't have backend support for any targets - // that don't use the altivec abi. - if (StringRef(A->getValue()) != "altivec") - ABIName = A->getValue(); - - ppc::FloatABI FloatABI = - ppc::getPPCFloatABI(getToolChain().getDriver(), Args); - - if (FloatABI == ppc::FloatABI::Soft) { - // Floating point operations and argument passing are soft. - CmdArgs.push_back("-msoft-float"); - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("soft"); - } else { - // Floating point operations and argument passing are hard. - assert(FloatABI == ppc::FloatABI::Hard && "Invalid float abi!"); - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("hard"); - } - - if (ABIName) { - CmdArgs.push_back("-target-abi"); - CmdArgs.push_back(ABIName); - } -} - -bool ppc::hasPPCAbiArg(const ArgList &Args, const char *Value) { - Arg *A = Args.getLastArg(options::OPT_mabi_EQ); - return A && (A->getValue() == StringRef(Value)); -} - -/// Get the (LLVM) name of the R600 gpu we are targeting. -static std::string getR600TargetGPU(const ArgList &Args) { - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - const char *GPUName = A->getValue(); - return llvm::StringSwitch(GPUName) - .Cases("rv630", "rv635", "r600") - .Cases("rv610", "rv620", "rs780", "rs880") - .Case("rv740", "rv770") - .Case("palm", "cedar") - .Cases("sumo", "sumo2", "sumo") - .Case("hemlock", "cypress") - .Case("aruba", "cayman") - .Default(GPUName); - } - return ""; -} - -static std::string getLanaiTargetCPU(const ArgList &Args) { - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - return A->getValue(); - } - return ""; -} - -sparc::FloatABI sparc::getSparcFloatABI(const Driver &D, - const ArgList &Args) { - sparc::FloatABI ABI = sparc::FloatABI::Invalid; - if (Arg *A = - Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, - options::OPT_mfloat_abi_EQ)) { - if (A->getOption().matches(options::OPT_msoft_float)) - ABI = sparc::FloatABI::Soft; - else if (A->getOption().matches(options::OPT_mhard_float)) - ABI = sparc::FloatABI::Hard; - else { - ABI = llvm::StringSwitch(A->getValue()) - .Case("soft", sparc::FloatABI::Soft) - .Case("hard", sparc::FloatABI::Hard) - .Default(sparc::FloatABI::Invalid); - if (ABI == sparc::FloatABI::Invalid && - !StringRef(A->getValue()).empty()) { - D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); - ABI = sparc::FloatABI::Hard; - } - } - } - - // If unspecified, choose the default based on the platform. - // Only the hard-float ABI on Sparc is standardized, and it is the - // default. GCC also supports a nonstandard soft-float ABI mode, also - // implemented in LLVM. However as this is not standard we set the default - // to be hard-float. - if (ABI == sparc::FloatABI::Invalid) { - ABI = sparc::FloatABI::Hard; - } - - return ABI; -} - -static void getSparcTargetFeatures(const Driver &D, const ArgList &Args, - std::vector &Features) { - sparc::FloatABI FloatABI = sparc::getSparcFloatABI(D, Args); - if (FloatABI == sparc::FloatABI::Soft) - Features.push_back("+soft-float"); -} - -void Clang::AddSparcTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - sparc::FloatABI FloatABI = - sparc::getSparcFloatABI(getToolChain().getDriver(), Args); - - if (FloatABI == sparc::FloatABI::Soft) { - // Floating point operations and argument passing are soft. - CmdArgs.push_back("-msoft-float"); - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("soft"); - } else { - // Floating point operations and argument passing are hard. - assert(FloatABI == sparc::FloatABI::Hard && "Invalid float abi!"); - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("hard"); - } -} - -void Clang::AddSystemZTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - if (Args.hasFlag(options::OPT_mbackchain, options::OPT_mno_backchain, false)) - CmdArgs.push_back("-mbackchain"); -} - -static const char *getSystemZTargetCPU(const ArgList &Args) { - if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) - return A->getValue(); - return "z10"; -} - -static void getSystemZTargetFeatures(const ArgList &Args, - std::vector &Features) { - // -m(no-)htm overrides use of the transactional-execution facility. - if (Arg *A = Args.getLastArg(options::OPT_mhtm, options::OPT_mno_htm)) { - if (A->getOption().matches(options::OPT_mhtm)) - Features.push_back("+transactional-execution"); - else - Features.push_back("-transactional-execution"); - } - // -m(no-)vx overrides use of the vector facility. - if (Arg *A = Args.getLastArg(options::OPT_mvx, options::OPT_mno_vx)) { - if (A->getOption().matches(options::OPT_mvx)) - Features.push_back("+vector"); - else - Features.push_back("-vector"); - } -} - -static const char *getX86TargetCPU(const ArgList &Args, - const llvm::Triple &Triple) { - if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { - if (StringRef(A->getValue()) != "native") { - if (Triple.isOSDarwin() && Triple.getArchName() == "x86_64h") - return "core-avx2"; - - return A->getValue(); - } - - // FIXME: Reject attempts to use -march=native unless the target matches - // the host. - // - // FIXME: We should also incorporate the detected target features for use - // with -native. - std::string CPU = llvm::sys::getHostCPUName(); - if (!CPU.empty() && CPU != "generic") - return Args.MakeArgString(CPU); - } - - if (const Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) { - // Mapping built by referring to X86TargetInfo::getDefaultFeatures(). - StringRef Arch = A->getValue(); - const char *CPU; - if (Triple.getArch() == llvm::Triple::x86) { - CPU = llvm::StringSwitch(Arch) - .Case("IA32", "i386") - .Case("SSE", "pentium3") - .Case("SSE2", "pentium4") - .Case("AVX", "sandybridge") - .Case("AVX2", "haswell") - .Default(nullptr); - } else { - CPU = llvm::StringSwitch(Arch) - .Case("AVX", "sandybridge") - .Case("AVX2", "haswell") - .Default(nullptr); - } - if (CPU) - return CPU; - } - - // Select the default CPU if none was given (or detection failed). - - if (Triple.getArch() != llvm::Triple::x86_64 && - Triple.getArch() != llvm::Triple::x86) - return nullptr; // This routine is only handling x86 targets. - - bool Is64Bit = Triple.getArch() == llvm::Triple::x86_64; - - // FIXME: Need target hooks. - if (Triple.isOSDarwin()) { - if (Triple.getArchName() == "x86_64h") - return "core-avx2"; - return Is64Bit ? "core2" : "yonah"; - } - - // Set up default CPU name for PS4 compilers. - if (Triple.isPS4CPU()) - return "btver2"; - - // On Android use targets compatible with gcc - if (Triple.isAndroid()) - return Is64Bit ? "x86-64" : "i686"; - - // Everything else goes to x86-64 in 64-bit mode. - if (Is64Bit) - return "x86-64"; - - switch (Triple.getOS()) { - case llvm::Triple::FreeBSD: - case llvm::Triple::NetBSD: - case llvm::Triple::OpenBSD: - return "i486"; - case llvm::Triple::Haiku: - return "i586"; - case llvm::Triple::Bitrig: - return "i686"; - default: - // Fallback to p4. - return "pentium4"; - } -} - -/// Get the (LLVM) name of the WebAssembly cpu we are targeting. -static StringRef getWebAssemblyTargetCPU(const ArgList &Args) { - // If we have -mcpu=, use that. - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - StringRef CPU = A->getValue(); - -#ifdef __wasm__ - // Handle "native" by examining the host. "native" isn't meaningful when - // cross compiling, so only support this when the host is also WebAssembly. - if (CPU == "native") - return llvm::sys::getHostCPUName(); -#endif - - return CPU; - } - - return "generic"; -} - -static StringRef getZ80TargetCPU(const ArgList &Args, - const llvm::Triple &Triple) { - if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) - return A->getValue(); - - // Select the default CPU if none was given (or detection failed). - return Triple.getArchName(); -} - -static std::string getCPUName(const ArgList &Args, const llvm::Triple &T, - bool FromAs = false) { - Arg *A; - - switch (T.getArch()) { - default: - return ""; - - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - return getAArch64TargetCPU(Args, A); - - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: { - StringRef MArch, MCPU; - getARMArchCPUFromArgs(Args, MArch, MCPU, FromAs); - return arm::getARMTargetCPU(MCPU, MArch, T); - } - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: { - StringRef CPUName; - StringRef ABIName; - mips::getMipsCPUAndABI(Args, T, CPUName, ABIName); - return CPUName; - } - - case llvm::Triple::nvptx: - case llvm::Triple::nvptx64: - if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) - return A->getValue(); - return ""; - - case llvm::Triple::ppc: - case llvm::Triple::ppc64: - case llvm::Triple::ppc64le: { - std::string TargetCPUName = getPPCTargetCPU(Args); - // LLVM may default to generating code for the native CPU, - // but, like gcc, we default to a more generic option for - // each architecture. (except on Darwin) - if (TargetCPUName.empty() && !T.isOSDarwin()) { - if (T.getArch() == llvm::Triple::ppc64) - TargetCPUName = "ppc64"; - else if (T.getArch() == llvm::Triple::ppc64le) - TargetCPUName = "ppc64le"; - else - TargetCPUName = "ppc"; - } - return TargetCPUName; - } - - case llvm::Triple::sparc: - case llvm::Triple::sparcel: - case llvm::Triple::sparcv9: - if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) - return A->getValue(); - return ""; - - case llvm::Triple::x86: - case llvm::Triple::x86_64: - return getX86TargetCPU(Args, T); - - case llvm::Triple::hexagon: - return "hexagon" + - toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); - - case llvm::Triple::lanai: - return getLanaiTargetCPU(Args); - - case llvm::Triple::systemz: - return getSystemZTargetCPU(Args); - - case llvm::Triple::r600: - case llvm::Triple::amdgcn: - return getR600TargetGPU(Args); - - case llvm::Triple::wasm32: - case llvm::Triple::wasm64: - return getWebAssemblyTargetCPU(Args); - - case llvm::Triple::z80: - case llvm::Triple::ez80: - return getZ80TargetCPU(Args, T); - } -} - -static unsigned getLTOParallelism(const ArgList &Args, const Driver &D) { - unsigned Parallelism = 0; - Arg *LtoJobsArg = Args.getLastArg(options::OPT_flto_jobs_EQ); - if (LtoJobsArg && - StringRef(LtoJobsArg->getValue()).getAsInteger(10, Parallelism)) - D.Diag(diag::err_drv_invalid_int_value) << LtoJobsArg->getAsString(Args) - << LtoJobsArg->getValue(); - return Parallelism; -} - -// CloudABI and WebAssembly use -ffunction-sections and -fdata-sections by -// default. -static bool isUseSeparateSections(const llvm::Triple &Triple) { - return Triple.getOS() == llvm::Triple::CloudABI || - Triple.getArch() == llvm::Triple::wasm32 || - Triple.getArch() == llvm::Triple::wasm64; -} - -static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args, - ArgStringList &CmdArgs, bool IsThinLTO, - const Driver &D) { - // Tell the linker to load the plugin. This has to come before AddLinkerInputs - // as gold requires -plugin to come before any -plugin-opt that -Wl might - // forward. - CmdArgs.push_back("-plugin"); - std::string Plugin = - ToolChain.getDriver().Dir + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold.so"; - CmdArgs.push_back(Args.MakeArgString(Plugin)); - - // Try to pass driver level flags relevant to LTO code generation down to - // the plugin. - - // Handle flags for selecting CPU variants. - std::string CPU = getCPUName(Args, ToolChain.getTriple()); - if (!CPU.empty()) - CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU)); - - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - StringRef OOpt; - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - OOpt = "3"; - else if (A->getOption().matches(options::OPT_O)) - OOpt = A->getValue(); - else if (A->getOption().matches(options::OPT_O0)) - OOpt = "0"; - if (!OOpt.empty()) - CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=O") + OOpt)); - } - - if (IsThinLTO) - CmdArgs.push_back("-plugin-opt=thinlto"); - - if (unsigned Parallelism = getLTOParallelism(Args, D)) - CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=jobs=") + - llvm::to_string(Parallelism))); - - // If an explicit debugger tuning argument appeared, pass it along. - if (Arg *A = Args.getLastArg(options::OPT_gTune_Group, - options::OPT_ggdbN_Group)) { - if (A->getOption().matches(options::OPT_glldb)) - CmdArgs.push_back("-plugin-opt=-debugger-tune=lldb"); - else if (A->getOption().matches(options::OPT_gsce)) - CmdArgs.push_back("-plugin-opt=-debugger-tune=sce"); - else - CmdArgs.push_back("-plugin-opt=-debugger-tune=gdb"); - } - - bool UseSeparateSections = - isUseSeparateSections(ToolChain.getEffectiveTriple()); - - if (Args.hasFlag(options::OPT_ffunction_sections, - options::OPT_fno_function_sections, UseSeparateSections)) { - CmdArgs.push_back("-plugin-opt=-function-sections"); - } - - if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections, - UseSeparateSections)) { - CmdArgs.push_back("-plugin-opt=-data-sections"); - } -} - -/// This is a helper function for validating the optional refinement step -/// parameter in reciprocal argument strings. Return false if there is an error -/// parsing the refinement step. Otherwise, return true and set the Position -/// of the refinement step in the input string. -static bool getRefinementStep(StringRef In, const Driver &D, - const Arg &A, size_t &Position) { - const char RefinementStepToken = ':'; - Position = In.find(RefinementStepToken); - if (Position != StringRef::npos) { - StringRef Option = A.getOption().getName(); - StringRef RefStep = In.substr(Position + 1); - // Allow exactly one numeric character for the additional refinement - // step parameter. This is reasonable for all currently-supported - // operations and architectures because we would expect that a larger value - // of refinement steps would cause the estimate "optimization" to - // under-perform the native operation. Also, if the estimate does not - // converge quickly, it probably will not ever converge, so further - // refinement steps will not produce a better answer. - if (RefStep.size() != 1) { - D.Diag(diag::err_drv_invalid_value) << Option << RefStep; - return false; - } - char RefStepChar = RefStep[0]; - if (RefStepChar < '0' || RefStepChar > '9') { - D.Diag(diag::err_drv_invalid_value) << Option << RefStep; - return false; - } - } - return true; -} - -/// The -mrecip flag requires processing of many optional parameters. -static void ParseMRecip(const Driver &D, const ArgList &Args, - ArgStringList &OutStrings) { - StringRef DisabledPrefixIn = "!"; - StringRef DisabledPrefixOut = "!"; - StringRef EnabledPrefixOut = ""; - StringRef Out = "-mrecip="; - - Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ); - if (!A) - return; - - unsigned NumOptions = A->getNumValues(); - if (NumOptions == 0) { - // No option is the same as "all". - OutStrings.push_back(Args.MakeArgString(Out + "all")); - return; - } - - // Pass through "all", "none", or "default" with an optional refinement step. - if (NumOptions == 1) { - StringRef Val = A->getValue(0); - size_t RefStepLoc; - if (!getRefinementStep(Val, D, *A, RefStepLoc)) - return; - StringRef ValBase = Val.slice(0, RefStepLoc); - if (ValBase == "all" || ValBase == "none" || ValBase == "default") { - OutStrings.push_back(Args.MakeArgString(Out + Val)); - return; - } - } - - // Each reciprocal type may be enabled or disabled individually. - // Check each input value for validity, concatenate them all back together, - // and pass through. - - llvm::StringMap OptionStrings; - OptionStrings.insert(std::make_pair("divd", false)); - OptionStrings.insert(std::make_pair("divf", false)); - OptionStrings.insert(std::make_pair("vec-divd", false)); - OptionStrings.insert(std::make_pair("vec-divf", false)); - OptionStrings.insert(std::make_pair("sqrtd", false)); - OptionStrings.insert(std::make_pair("sqrtf", false)); - OptionStrings.insert(std::make_pair("vec-sqrtd", false)); - OptionStrings.insert(std::make_pair("vec-sqrtf", false)); - - for (unsigned i = 0; i != NumOptions; ++i) { - StringRef Val = A->getValue(i); - - bool IsDisabled = Val.startswith(DisabledPrefixIn); - // Ignore the disablement token for string matching. - if (IsDisabled) - Val = Val.substr(1); - - size_t RefStep; - if (!getRefinementStep(Val, D, *A, RefStep)) - return; - - StringRef ValBase = Val.slice(0, RefStep); - llvm::StringMap::iterator OptionIter = OptionStrings.find(ValBase); - if (OptionIter == OptionStrings.end()) { - // Try again specifying float suffix. - OptionIter = OptionStrings.find(ValBase.str() + 'f'); - if (OptionIter == OptionStrings.end()) { - // The input name did not match any known option string. - D.Diag(diag::err_drv_unknown_argument) << Val; - return; - } - // The option was specified without a float or double suffix. - // Make sure that the double entry was not already specified. - // The float entry will be checked below. - if (OptionStrings[ValBase.str() + 'd']) { - D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val; - return; - } - } - - if (OptionIter->second == true) { - // Duplicate option specified. - D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val; - return; - } - - // Mark the matched option as found. Do not allow duplicate specifiers. - OptionIter->second = true; - - // If the precision was not specified, also mark the double entry as found. - if (ValBase.back() != 'f' && ValBase.back() != 'd') - OptionStrings[ValBase.str() + 'd'] = true; - - // Build the output string. - StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut; - Out = Args.MakeArgString(Out + Prefix + Val); - if (i != NumOptions - 1) - Out = Args.MakeArgString(Out + ","); - } - - OutStrings.push_back(Args.MakeArgString(Out)); -} - -static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args, - std::vector &Features) { - // If -march=native, autodetect the feature list. - if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { - if (StringRef(A->getValue()) == "native") { - llvm::StringMap HostFeatures; - if (llvm::sys::getHostCPUFeatures(HostFeatures)) - for (auto &F : HostFeatures) - Features.push_back( - Args.MakeArgString((F.second ? "+" : "-") + F.first())); - } - } - - if (Triple.getArchName() == "x86_64h") { - // x86_64h implies quite a few of the more modern subtarget features - // for Haswell class CPUs, but not all of them. Opt-out of a few. - Features.push_back("-rdrnd"); - Features.push_back("-aes"); - Features.push_back("-pclmul"); - Features.push_back("-rtm"); - Features.push_back("-hle"); - Features.push_back("-fsgsbase"); - } - - const llvm::Triple::ArchType ArchType = Triple.getArch(); - // Add features to be compatible with gcc for Android. - if (Triple.isAndroid()) { - if (ArchType == llvm::Triple::x86_64) { - Features.push_back("+sse4.2"); - Features.push_back("+popcnt"); - } else - Features.push_back("+ssse3"); - } - - // Set features according to the -arch flag on MSVC. - if (Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) { - StringRef Arch = A->getValue(); - bool ArchUsed = false; - // First, look for flags that are shared in x86 and x86-64. - if (ArchType == llvm::Triple::x86_64 || ArchType == llvm::Triple::x86) { - if (Arch == "AVX" || Arch == "AVX2") { - ArchUsed = true; - Features.push_back(Args.MakeArgString("+" + Arch.lower())); - } - } - // Then, look for x86-specific flags. - if (ArchType == llvm::Triple::x86) { - if (Arch == "IA32") { - ArchUsed = true; - } else if (Arch == "SSE" || Arch == "SSE2") { - ArchUsed = true; - Features.push_back(Args.MakeArgString("+" + Arch.lower())); - } - } - if (!ArchUsed) - D.Diag(clang::diag::warn_drv_unused_argument) << A->getAsString(Args); - } - - // Now add any that the user explicitly requested on the command line, - // which may override the defaults. - handleTargetFeaturesGroup(Args, Features, options::OPT_m_x86_Features_Group); -} - -void Clang::AddX86TargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) || - Args.hasArg(options::OPT_mkernel) || - Args.hasArg(options::OPT_fapple_kext)) - CmdArgs.push_back("-disable-red-zone"); - - // Default to avoid implicit floating-point for kernel/kext code, but allow - // that to be overridden with -mno-soft-float. - bool NoImplicitFloat = (Args.hasArg(options::OPT_mkernel) || - Args.hasArg(options::OPT_fapple_kext)); - if (Arg *A = Args.getLastArg( - options::OPT_msoft_float, options::OPT_mno_soft_float, - options::OPT_mimplicit_float, options::OPT_mno_implicit_float)) { - const Option &O = A->getOption(); - NoImplicitFloat = (O.matches(options::OPT_mno_implicit_float) || - O.matches(options::OPT_msoft_float)); - } - if (NoImplicitFloat) - CmdArgs.push_back("-no-implicit-float"); - - if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) { - StringRef Value = A->getValue(); - if (Value == "intel" || Value == "att") { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value)); - } else { - getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << Value; - } - } - - // Set flags to support MCU ABI. - if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) { - CmdArgs.push_back("-mfloat-abi"); - CmdArgs.push_back("soft"); - CmdArgs.push_back("-mstack-alignment=4"); - } -} - -void Clang::AddHexagonTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - CmdArgs.push_back("-mqdsp6-compat"); - CmdArgs.push_back("-Wreturn-type"); - - if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - std::string N = llvm::utostr(G.getValue()); - std::string Opt = std::string("-hexagon-small-data-threshold=") + N; - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString(Opt)); - } - - if (!Args.hasArg(options::OPT_fno_short_enums)) - CmdArgs.push_back("-fshort-enums"); - if (Args.getLastArg(options::OPT_mieee_rnd_near)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-enable-hexagon-ieee-rnd-near"); - } - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-machine-sink-split=0"); -} - -void Clang::AddLanaiTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - StringRef CPUName = A->getValue(); - - CmdArgs.push_back("-target-cpu"); - CmdArgs.push_back(Args.MakeArgString(CPUName)); - } - if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) { - StringRef Value = A->getValue(); - // Only support mregparm=4 to support old usage. Report error for all other - // cases. - int Mregparm; - if (Value.getAsInteger(10, Mregparm)) { - if (Mregparm != 4) { - getToolChain().getDriver().Diag( - diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << Value; - } - } - } -} - -void Clang::AddWebAssemblyTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - // Default to "hidden" visibility. - if (!Args.hasArg(options::OPT_fvisibility_EQ, - options::OPT_fvisibility_ms_compat)) { - CmdArgs.push_back("-fvisibility"); - CmdArgs.push_back("hidden"); - } -} - -// Decode AArch64 features from string like +[no]featureA+[no]featureB+... -static bool DecodeAArch64Features(const Driver &D, StringRef text, - std::vector &Features) { - SmallVector Split; - text.split(Split, StringRef("+"), -1, false); - - for (StringRef Feature : Split) { - StringRef FeatureName = llvm::AArch64::getArchExtFeature(Feature); - if (!FeatureName.empty()) - Features.push_back(FeatureName); - else if (Feature == "neon" || Feature == "noneon") - D.Diag(diag::err_drv_no_neon_modifier); - else - return false; - } - return true; -} - -// Check if the CPU name and feature modifiers in -mcpu are legal. If yes, -// decode CPU and feature. -static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU, - std::vector &Features) { - std::pair Split = Mcpu.split("+"); - CPU = Split.first; - - if (CPU == "generic") { - Features.push_back("+neon"); - } else { - unsigned ArchKind = llvm::AArch64::parseCPUArch(CPU); - if (!llvm::AArch64::getArchFeatures(ArchKind, Features)) - return false; - - unsigned Extension = llvm::AArch64::getDefaultExtensions(CPU, ArchKind); - if (!llvm::AArch64::getExtensionFeatures(Extension, Features)) - return false; - } - - if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features)) - return false; - - return true; -} - -static bool -getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March, - const ArgList &Args, - std::vector &Features) { - std::string MarchLowerCase = March.lower(); - std::pair Split = StringRef(MarchLowerCase).split("+"); - - unsigned ArchKind = llvm::AArch64::parseArch(Split.first); - if (ArchKind == static_cast(llvm::AArch64::ArchKind::AK_INVALID) || - !llvm::AArch64::getArchFeatures(ArchKind, Features) || - (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))) - return false; - - return true; -} - -static bool -getAArch64ArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, - const ArgList &Args, - std::vector &Features) { - StringRef CPU; - std::string McpuLowerCase = Mcpu.lower(); - if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, Features)) - return false; - - return true; -} - -static bool -getAArch64MicroArchFeaturesFromMtune(const Driver &D, StringRef Mtune, - const ArgList &Args, - std::vector &Features) { - std::string MtuneLowerCase = Mtune.lower(); - // Handle CPU name is 'native'. - if (MtuneLowerCase == "native") - MtuneLowerCase = llvm::sys::getHostCPUName(); - if (MtuneLowerCase == "cyclone") { - Features.push_back("+zcm"); - Features.push_back("+zcz"); - } - return true; -} - -static bool -getAArch64MicroArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, - const ArgList &Args, - std::vector &Features) { - StringRef CPU; - std::vector DecodedFeature; - std::string McpuLowerCase = Mcpu.lower(); - if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, DecodedFeature)) - return false; - - return getAArch64MicroArchFeaturesFromMtune(D, CPU, Args, Features); -} - -static void getAArch64TargetFeatures(const Driver &D, const ArgList &Args, - std::vector &Features) { - Arg *A; - bool success = true; - // Enable NEON by default. - Features.push_back("+neon"); - if ((A = Args.getLastArg(options::OPT_march_EQ))) - success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Features); - else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) - success = getAArch64ArchFeaturesFromMcpu(D, A->getValue(), Args, Features); - else if (Args.hasArg(options::OPT_arch)) - success = getAArch64ArchFeaturesFromMcpu(D, getAArch64TargetCPU(Args, A), - Args, Features); - - if (success && (A = Args.getLastArg(options::OPT_mtune_EQ))) - success = - getAArch64MicroArchFeaturesFromMtune(D, A->getValue(), Args, Features); - else if (success && (A = Args.getLastArg(options::OPT_mcpu_EQ))) - success = - getAArch64MicroArchFeaturesFromMcpu(D, A->getValue(), Args, Features); - else if (success && Args.hasArg(options::OPT_arch)) - success = getAArch64MicroArchFeaturesFromMcpu( - D, getAArch64TargetCPU(Args, A), Args, Features); - - if (!success) - D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); - - if (Args.getLastArg(options::OPT_mgeneral_regs_only)) { - Features.push_back("-fp-armv8"); - Features.push_back("-crypto"); - Features.push_back("-neon"); - } - - // En/disable crc - if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) { - if (A->getOption().matches(options::OPT_mcrc)) - Features.push_back("+crc"); - else - Features.push_back("-crc"); - } - - if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, - options::OPT_munaligned_access)) - if (A->getOption().matches(options::OPT_mno_unaligned_access)) - Features.push_back("+strict-align"); - - if (Args.hasArg(options::OPT_ffixed_x18)) - Features.push_back("+reserve-x18"); -} - -static void getHexagonTargetFeatures(const ArgList &Args, - std::vector &Features) { - handleTargetFeaturesGroup(Args, Features, - options::OPT_m_hexagon_Features_Group); - - bool UseLongCalls = false; - if (Arg *A = Args.getLastArg(options::OPT_mlong_calls, - options::OPT_mno_long_calls)) { - if (A->getOption().matches(options::OPT_mlong_calls)) - UseLongCalls = true; - } - - Features.push_back(UseLongCalls ? "+long-calls" : "-long-calls"); -} - -static void getWebAssemblyTargetFeatures(const ArgList &Args, - std::vector &Features) { - handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group); -} - -static void getAMDGPUTargetFeatures(const Driver &D, const ArgList &Args, - std::vector &Features) { - if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) { - StringRef value = dAbi->getValue(); - if (value == "1.0") { - Features.push_back("+amdgpu-debugger-insert-nops"); - Features.push_back("+amdgpu-debugger-reserve-regs"); - Features.push_back("+amdgpu-debugger-emit-prologue"); - } else { - D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args); - } - } - - handleTargetFeaturesGroup( - Args, Features, options::OPT_m_amdgpu_Features_Group); -} - -static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple, - const ArgList &Args, ArgStringList &CmdArgs, - bool ForAS) { - const Driver &D = TC.getDriver(); - std::vector Features; - switch (Triple.getArch()) { - default: - break; - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - getMIPSTargetFeatures(D, Triple, Args, Features); - break; - - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - getARMTargetFeatures(TC, Triple, Args, CmdArgs, Features, ForAS); - break; - - case llvm::Triple::ppc: - case llvm::Triple::ppc64: - case llvm::Triple::ppc64le: - getPPCTargetFeatures(D, Triple, Args, Features); - break; - case llvm::Triple::systemz: - getSystemZTargetFeatures(Args, Features); - break; - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - getAArch64TargetFeatures(D, Args, Features); - break; - case llvm::Triple::x86: - case llvm::Triple::x86_64: - getX86TargetFeatures(D, Triple, Args, Features); - break; - case llvm::Triple::hexagon: - getHexagonTargetFeatures(Args, Features); - break; - case llvm::Triple::wasm32: - case llvm::Triple::wasm64: - getWebAssemblyTargetFeatures(Args, Features); - break; - case llvm::Triple::sparc: - case llvm::Triple::sparcel: - case llvm::Triple::sparcv9: - getSparcTargetFeatures(D, Args, Features); - break; - case llvm::Triple::r600: - case llvm::Triple::amdgcn: - getAMDGPUTargetFeatures(D, Args, Features); - break; - } - - // Find the last of each feature. - llvm::StringMap LastOpt; - for (unsigned I = 0, N = Features.size(); I < N; ++I) { - StringRef Name = Features[I]; - assert(Name[0] == '-' || Name[0] == '+'); - LastOpt[Name.drop_front(1)] = I; - } - - for (unsigned I = 0, N = Features.size(); I < N; ++I) { - // If this feature was overridden, ignore it. - StringRef Name = Features[I]; - llvm::StringMap::iterator LastI = LastOpt.find(Name.drop_front(1)); - assert(LastI != LastOpt.end()); - unsigned Last = LastI->second; - if (Last != I) - continue; - - CmdArgs.push_back("-target-feature"); - CmdArgs.push_back(Name.data()); - } -} - -static bool -shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime, - const llvm::Triple &Triple) { - // We use the zero-cost exception tables for Objective-C if the non-fragile - // ABI is enabled or when compiling for x86_64 and ARM on Snow Leopard and - // later. - if (runtime.isNonFragile()) - return true; - - if (!Triple.isMacOSX()) - return false; - - return (!Triple.isMacOSXVersionLT(10, 5) && - (Triple.getArch() == llvm::Triple::x86_64 || - Triple.getArch() == llvm::Triple::arm)); -} - -/// Adds exception related arguments to the driver command arguments. There's a -/// master flag, -fexceptions and also language specific flags to enable/disable -/// C++ and Objective-C exceptions. This makes it possible to for example -/// disable C++ exceptions but enable Objective-C exceptions. -static void addExceptionArgs(const ArgList &Args, types::ID InputType, - const ToolChain &TC, bool KernelOrKext, - const ObjCRuntime &objcRuntime, - ArgStringList &CmdArgs) { - const Driver &D = TC.getDriver(); - const llvm::Triple &Triple = TC.getTriple(); - - if (KernelOrKext) { - // -mkernel and -fapple-kext imply no exceptions, so claim exception related - // arguments now to avoid warnings about unused arguments. - Args.ClaimAllArgs(options::OPT_fexceptions); - Args.ClaimAllArgs(options::OPT_fno_exceptions); - Args.ClaimAllArgs(options::OPT_fobjc_exceptions); - Args.ClaimAllArgs(options::OPT_fno_objc_exceptions); - Args.ClaimAllArgs(options::OPT_fcxx_exceptions); - Args.ClaimAllArgs(options::OPT_fno_cxx_exceptions); - return; - } - - // See if the user explicitly enabled exceptions. - bool EH = Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, - false); - - // Obj-C exceptions are enabled by default, regardless of -fexceptions. This - // is not necessarily sensible, but follows GCC. - if (types::isObjC(InputType) && - Args.hasFlag(options::OPT_fobjc_exceptions, - options::OPT_fno_objc_exceptions, true)) { - CmdArgs.push_back("-fobjc-exceptions"); - - EH |= shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple); - } - - if (types::isCXX(InputType)) { - // Disable C++ EH by default on XCore and PS4. - bool CXXExceptionsEnabled = - Triple.getArch() != llvm::Triple::xcore && !Triple.isPS4CPU(); - Arg *ExceptionArg = Args.getLastArg( - options::OPT_fcxx_exceptions, options::OPT_fno_cxx_exceptions, - options::OPT_fexceptions, options::OPT_fno_exceptions); - if (ExceptionArg) - CXXExceptionsEnabled = - ExceptionArg->getOption().matches(options::OPT_fcxx_exceptions) || - ExceptionArg->getOption().matches(options::OPT_fexceptions); - - if (CXXExceptionsEnabled) { - if (Triple.isPS4CPU()) { - ToolChain::RTTIMode RTTIMode = TC.getRTTIMode(); - assert(ExceptionArg && - "On the PS4 exceptions should only be enabled if passing " - "an argument"); - if (RTTIMode == ToolChain::RM_DisabledExplicitly) { - const Arg *RTTIArg = TC.getRTTIArg(); - assert(RTTIArg && "RTTI disabled explicitly but no RTTIArg!"); - D.Diag(diag::err_drv_argument_not_allowed_with) - << RTTIArg->getAsString(Args) << ExceptionArg->getAsString(Args); - } else if (RTTIMode == ToolChain::RM_EnabledImplicitly) - D.Diag(diag::warn_drv_enabling_rtti_with_exceptions); - } else - assert(TC.getRTTIMode() != ToolChain::RM_DisabledImplicitly); - - CmdArgs.push_back("-fcxx-exceptions"); - - EH = true; - } - } - - if (EH) - CmdArgs.push_back("-fexceptions"); -} - -static bool ShouldDisableAutolink(const ArgList &Args, const ToolChain &TC) { - bool Default = true; - if (TC.getTriple().isOSDarwin()) { - // The native darwin assembler doesn't support the linker_option directives, - // so we disable them if we think the .s file will be passed to it. - Default = TC.useIntegratedAs(); - } - return !Args.hasFlag(options::OPT_fautolink, options::OPT_fno_autolink, - Default); -} - -static bool ShouldDisableDwarfDirectory(const ArgList &Args, - const ToolChain &TC) { - bool UseDwarfDirectory = - Args.hasFlag(options::OPT_fdwarf_directory_asm, - options::OPT_fno_dwarf_directory_asm, TC.useIntegratedAs()); - return !UseDwarfDirectory; -} - -/// \brief Check whether the given input tree contains any compilation actions. -static bool ContainsCompileAction(const Action *A) { - if (isa(A) || isa(A)) - return true; - - for (const auto &AI : A->inputs()) - if (ContainsCompileAction(AI)) - return true; - - return false; -} - -/// \brief Check if -relax-all should be passed to the internal assembler. -/// This is done by default when compiling non-assembler source with -O0. -static bool UseRelaxAll(Compilation &C, const ArgList &Args) { - bool RelaxDefault = true; - - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) - RelaxDefault = A->getOption().matches(options::OPT_O0); - - if (RelaxDefault) { - RelaxDefault = false; - for (const auto &Act : C.getActions()) { - if (ContainsCompileAction(Act)) { - RelaxDefault = true; - break; - } - } - } - - return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all, - RelaxDefault); -} - -// Convert an arg of the form "-gN" or "-ggdbN" or one of their aliases -// to the corresponding DebugInfoKind. -static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) { - assert(A.getOption().matches(options::OPT_gN_Group) && - "Not a -g option that specifies a debug-info level"); - if (A.getOption().matches(options::OPT_g0) || - A.getOption().matches(options::OPT_ggdb0)) - return codegenoptions::NoDebugInfo; - if (A.getOption().matches(options::OPT_gline_tables_only) || - A.getOption().matches(options::OPT_ggdb1)) - return codegenoptions::DebugLineTablesOnly; - return codegenoptions::LimitedDebugInfo; -} - -// Extract the integer N from a string spelled "-dwarf-N", returning 0 -// on mismatch. The StringRef input (rather than an Arg) allows -// for use by the "-Xassembler" option parser. -static unsigned DwarfVersionNum(StringRef ArgValue) { - return llvm::StringSwitch(ArgValue) - .Case("-gdwarf-2", 2) - .Case("-gdwarf-3", 3) - .Case("-gdwarf-4", 4) - .Case("-gdwarf-5", 5) - .Default(0); -} - -static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs, - codegenoptions::DebugInfoKind DebugInfoKind, - unsigned DwarfVersion, - llvm::DebuggerKind DebuggerTuning) { - switch (DebugInfoKind) { - case codegenoptions::DebugLineTablesOnly: - CmdArgs.push_back("-debug-info-kind=line-tables-only"); - break; - case codegenoptions::LimitedDebugInfo: - CmdArgs.push_back("-debug-info-kind=limited"); - break; - case codegenoptions::FullDebugInfo: - CmdArgs.push_back("-debug-info-kind=standalone"); - break; - default: - break; - } - if (DwarfVersion > 0) - CmdArgs.push_back( - Args.MakeArgString("-dwarf-version=" + Twine(DwarfVersion))); - switch (DebuggerTuning) { - case llvm::DebuggerKind::GDB: - CmdArgs.push_back("-debugger-tuning=gdb"); - break; - case llvm::DebuggerKind::LLDB: - CmdArgs.push_back("-debugger-tuning=lldb"); - break; - case llvm::DebuggerKind::SCE: - CmdArgs.push_back("-debugger-tuning=sce"); - break; - default: - break; - } -} - -static void CollectArgsForIntegratedAssembler(Compilation &C, - const ArgList &Args, - ArgStringList &CmdArgs, - const Driver &D) { - if (UseRelaxAll(C, Args)) - CmdArgs.push_back("-mrelax-all"); - - // Only default to -mincremental-linker-compatible if we think we are - // targeting the MSVC linker. - bool DefaultIncrementalLinkerCompatible = - C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); - if (Args.hasFlag(options::OPT_mincremental_linker_compatible, - options::OPT_mno_incremental_linker_compatible, - DefaultIncrementalLinkerCompatible)) - CmdArgs.push_back("-mincremental-linker-compatible"); - - switch (C.getDefaultToolChain().getArch()) { - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - if (Arg *A = Args.getLastArg(options::OPT_mimplicit_it_EQ)) { - StringRef Value = A->getValue(); - if (Value == "always" || Value == "never" || Value == "arm" || - Value == "thumb") { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString("-arm-implicit-it=" + Value)); - } else { - D.Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << Value; - } - } - break; - default: - break; - } - - // When passing -I arguments to the assembler we sometimes need to - // unconditionally take the next argument. For example, when parsing - // '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the - // -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo' - // arg after parsing the '-I' arg. - bool TakeNextArg = false; - - // When using an integrated assembler, translate -Wa, and -Xassembler - // options. - bool CompressDebugSections = false; - - bool UseRelaxRelocations = ENABLE_X86_RELAX_RELOCATIONS; - const char *MipsTargetFeature = nullptr; - for (const Arg *A : - Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { - A->claim(); - - for (StringRef Value : A->getValues()) { - if (TakeNextArg) { - CmdArgs.push_back(Value.data()); - TakeNextArg = false; - continue; - } - - switch (C.getDefaultToolChain().getArch()) { - default: - break; - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - if (Value == "--trap") { - CmdArgs.push_back("-target-feature"); - CmdArgs.push_back("+use-tcc-in-div"); - continue; - } - if (Value == "--break") { - CmdArgs.push_back("-target-feature"); - CmdArgs.push_back("-use-tcc-in-div"); - continue; - } - if (Value.startswith("-msoft-float")) { - CmdArgs.push_back("-target-feature"); - CmdArgs.push_back("+soft-float"); - continue; - } - if (Value.startswith("-mhard-float")) { - CmdArgs.push_back("-target-feature"); - CmdArgs.push_back("-soft-float"); - continue; - } - - MipsTargetFeature = llvm::StringSwitch(Value) - .Case("-mips1", "+mips1") - .Case("-mips2", "+mips2") - .Case("-mips3", "+mips3") - .Case("-mips4", "+mips4") - .Case("-mips5", "+mips5") - .Case("-mips32", "+mips32") - .Case("-mips32r2", "+mips32r2") - .Case("-mips32r3", "+mips32r3") - .Case("-mips32r5", "+mips32r5") - .Case("-mips32r6", "+mips32r6") - .Case("-mips64", "+mips64") - .Case("-mips64r2", "+mips64r2") - .Case("-mips64r3", "+mips64r3") - .Case("-mips64r5", "+mips64r5") - .Case("-mips64r6", "+mips64r6") - .Default(nullptr); - if (MipsTargetFeature) - continue; - } - - if (Value == "-force_cpusubtype_ALL") { - // Do nothing, this is the default and we don't support anything else. - } else if (Value == "-L") { - CmdArgs.push_back("-msave-temp-labels"); - } else if (Value == "--fatal-warnings") { - CmdArgs.push_back("-massembler-fatal-warnings"); - } else if (Value == "--noexecstack") { - CmdArgs.push_back("-mnoexecstack"); - } else if (Value == "-compress-debug-sections" || - Value == "--compress-debug-sections") { - CompressDebugSections = true; - } else if (Value == "-nocompress-debug-sections" || - Value == "--nocompress-debug-sections") { - CompressDebugSections = false; - } else if (Value == "-mrelax-relocations=yes" || - Value == "--mrelax-relocations=yes") { - UseRelaxRelocations = true; - } else if (Value == "-mrelax-relocations=no" || - Value == "--mrelax-relocations=no") { - UseRelaxRelocations = false; - } else if (Value.startswith("-I")) { - CmdArgs.push_back(Value.data()); - // We need to consume the next argument if the current arg is a plain - // -I. The next arg will be the include directory. - if (Value == "-I") - TakeNextArg = true; - } else if (Value.startswith("-gdwarf-")) { - // "-gdwarf-N" options are not cc1as options. - unsigned DwarfVersion = DwarfVersionNum(Value); - if (DwarfVersion == 0) { // Send it onward, and let cc1as complain. - CmdArgs.push_back(Value.data()); - } else { - RenderDebugEnablingArgs(Args, CmdArgs, - codegenoptions::LimitedDebugInfo, - DwarfVersion, llvm::DebuggerKind::Default); - } - } else if (Value.startswith("-mcpu") || Value.startswith("-mfpu") || - Value.startswith("-mhwdiv") || Value.startswith("-march")) { - // Do nothing, we'll validate it later. - } else if (Value == "-defsym") { - if (A->getNumValues() != 2) { - D.Diag(diag::err_drv_defsym_invalid_format) << Value; - break; - } - const char *S = A->getValue(1); - auto Pair = StringRef(S).split('='); - auto Sym = Pair.first; - auto SVal = Pair.second; - - if (Sym.empty() || SVal.empty()) { - D.Diag(diag::err_drv_defsym_invalid_format) << S; - break; - } - int64_t IVal; - if (SVal.getAsInteger(0, IVal)) { - D.Diag(diag::err_drv_defsym_invalid_symval) << SVal; - break; - } - CmdArgs.push_back(Value.data()); - TakeNextArg = true; - } else { - D.Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << Value; - } - } - } - if (CompressDebugSections) { - if (llvm::zlib::isAvailable()) - CmdArgs.push_back("-compress-debug-sections"); - else - D.Diag(diag::warn_debug_compression_unavailable); - } - if (UseRelaxRelocations) - CmdArgs.push_back("--mrelax-relocations"); - if (MipsTargetFeature != nullptr) { - CmdArgs.push_back("-target-feature"); - CmdArgs.push_back(MipsTargetFeature); - } -} - -// This adds the static libclang_rt.builtins-arch.a directly to the command line -// FIXME: Make sure we can also emit shared objects if they're requested -// and available, check for possible errors, etc. -static void addClangRT(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs) { - CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins")); -} - -static void addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, - const ArgList &Args) { - if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false)) - return; - - switch (TC.getDriver().getOpenMPRuntime(Args)) { - case Driver::OMPRT_OMP: - CmdArgs.push_back("-lomp"); - break; - case Driver::OMPRT_GOMP: - CmdArgs.push_back("-lgomp"); - break; - case Driver::OMPRT_IOMP5: - CmdArgs.push_back("-liomp5"); - break; - case Driver::OMPRT_Unknown: - // Already diagnosed. - break; - } -} - -static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs, StringRef Sanitizer, - bool IsShared, bool IsWhole) { - // Wrap any static runtimes that must be forced into executable in - // whole-archive. - if (IsWhole) CmdArgs.push_back("-whole-archive"); - CmdArgs.push_back(TC.getCompilerRTArgString(Args, Sanitizer, IsShared)); - if (IsWhole) CmdArgs.push_back("-no-whole-archive"); -} - -// Tries to use a file with the list of dynamic symbols that need to be exported -// from the runtime library. Returns true if the file was found. -static bool addSanitizerDynamicList(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs, - StringRef Sanitizer) { - SmallString<128> SanRT(TC.getCompilerRT(Args, Sanitizer)); - if (llvm::sys::fs::exists(SanRT + ".syms")) { - CmdArgs.push_back(Args.MakeArgString("--dynamic-list=" + SanRT + ".syms")); - return true; - } - return false; -} - -static void linkSanitizerRuntimeDeps(const ToolChain &TC, - ArgStringList &CmdArgs) { - // Force linking against the system libraries sanitizers depends on - // (see PR15823 why this is necessary). - CmdArgs.push_back("--no-as-needed"); - // There's no libpthread or librt on RTEMS. - if (TC.getTriple().getOS() != llvm::Triple::RTEMS) { - CmdArgs.push_back("-lpthread"); - CmdArgs.push_back("-lrt"); - } - CmdArgs.push_back("-lm"); - // There's no libdl on FreeBSD or RTEMS. - if (TC.getTriple().getOS() != llvm::Triple::FreeBSD && - TC.getTriple().getOS() != llvm::Triple::RTEMS) - CmdArgs.push_back("-ldl"); -} - -static void -collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, - SmallVectorImpl &SharedRuntimes, - SmallVectorImpl &StaticRuntimes, - SmallVectorImpl &NonWholeStaticRuntimes, - SmallVectorImpl &HelperStaticRuntimes, - SmallVectorImpl &RequiredSymbols) { - const SanitizerArgs &SanArgs = TC.getSanitizerArgs(); - // Collect shared runtimes. - if (SanArgs.needsAsanRt() && SanArgs.needsSharedAsanRt()) { - SharedRuntimes.push_back("asan"); - } - // The stats_client library is also statically linked into DSOs. - if (SanArgs.needsStatsRt()) - StaticRuntimes.push_back("stats_client"); - - // Collect static runtimes. - if (Args.hasArg(options::OPT_shared) || TC.getTriple().isAndroid()) { - // Don't link static runtimes into DSOs or if compiling for Android. - return; - } - if (SanArgs.needsAsanRt()) { - if (SanArgs.needsSharedAsanRt()) { - HelperStaticRuntimes.push_back("asan-preinit"); - } else { - StaticRuntimes.push_back("asan"); - if (SanArgs.linkCXXRuntimes()) - StaticRuntimes.push_back("asan_cxx"); - } - } - if (SanArgs.needsDfsanRt()) - StaticRuntimes.push_back("dfsan"); - if (SanArgs.needsLsanRt()) - StaticRuntimes.push_back("lsan"); - if (SanArgs.needsMsanRt()) { - StaticRuntimes.push_back("msan"); - if (SanArgs.linkCXXRuntimes()) - StaticRuntimes.push_back("msan_cxx"); - } - if (SanArgs.needsTsanRt()) { - StaticRuntimes.push_back("tsan"); - if (SanArgs.linkCXXRuntimes()) - StaticRuntimes.push_back("tsan_cxx"); - } - if (SanArgs.needsUbsanRt()) { - StaticRuntimes.push_back("ubsan_standalone"); - if (SanArgs.linkCXXRuntimes()) - StaticRuntimes.push_back("ubsan_standalone_cxx"); - } - if (SanArgs.needsSafeStackRt()) - StaticRuntimes.push_back("safestack"); - if (SanArgs.needsCfiRt()) - StaticRuntimes.push_back("cfi"); - if (SanArgs.needsCfiDiagRt()) { - StaticRuntimes.push_back("cfi_diag"); - if (SanArgs.linkCXXRuntimes()) - StaticRuntimes.push_back("ubsan_standalone_cxx"); - } - if (SanArgs.needsStatsRt()) { - NonWholeStaticRuntimes.push_back("stats"); - RequiredSymbols.push_back("__sanitizer_stats_register"); - } - if (SanArgs.needsEsanRt()) - StaticRuntimes.push_back("esan"); -} - -// Should be called before we add system libraries (C++ ABI, libstdc++/libc++, -// C runtime, etc). Returns true if sanitizer system deps need to be linked in. -static bool addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs) { - SmallVector SharedRuntimes, StaticRuntimes, - NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols; - collectSanitizerRuntimes(TC, Args, SharedRuntimes, StaticRuntimes, - NonWholeStaticRuntimes, HelperStaticRuntimes, - RequiredSymbols); - for (auto RT : SharedRuntimes) - addSanitizerRuntime(TC, Args, CmdArgs, RT, true, false); - for (auto RT : HelperStaticRuntimes) - addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true); - bool AddExportDynamic = false; - for (auto RT : StaticRuntimes) { - addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true); - AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT); - } - for (auto RT : NonWholeStaticRuntimes) { - addSanitizerRuntime(TC, Args, CmdArgs, RT, false, false); - AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT); - } - for (auto S : RequiredSymbols) { - CmdArgs.push_back("-u"); - CmdArgs.push_back(Args.MakeArgString(S)); - } - // If there is a static runtime with no dynamic list, force all the symbols - // to be dynamic to be sure we export sanitizer interface functions. - if (AddExportDynamic) - CmdArgs.push_back("-export-dynamic"); - - const SanitizerArgs &SanArgs = TC.getSanitizerArgs(); - if (SanArgs.hasCrossDsoCfi() && !AddExportDynamic) - CmdArgs.push_back("-export-dynamic-symbol=__cfi_check"); - - return !StaticRuntimes.empty(); -} - -static bool addXRayRuntime(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs) { - if (Args.hasFlag(options::OPT_fxray_instrument, - options::OPT_fnoxray_instrument, false)) { - CmdArgs.push_back("-whole-archive"); - CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray", false)); - CmdArgs.push_back("-no-whole-archive"); - return true; - } - return false; -} - -static void linkXRayRuntimeDeps(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs) { - CmdArgs.push_back("--no-as-needed"); - CmdArgs.push_back("-lpthread"); - CmdArgs.push_back("-lrt"); - CmdArgs.push_back("-lm"); - CmdArgs.push_back("-latomic"); - if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) - CmdArgs.push_back("-lc++"); - else - CmdArgs.push_back("-lstdc++"); - if (TC.getTriple().getOS() != llvm::Triple::FreeBSD) - CmdArgs.push_back("-ldl"); -} - -static bool areOptimizationsEnabled(const ArgList &Args) { - // Find the last -O arg and see if it is non-zero. - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) - return !A->getOption().matches(options::OPT_O0); - // Defaults to -O0. - return false; -} - -static bool mustUseFramePointerForTarget(const llvm::Triple &Triple) { - switch (Triple.getArch()){ - default: - return false; - case llvm::Triple::arm: - case llvm::Triple::thumb: - // ARM Darwin targets require a frame pointer to be always present to aid - // offline debugging via backtraces. - return Triple.isOSDarwin(); - } -} - -static bool useFramePointerForTargetByDefault(const ArgList &Args, - const llvm::Triple &Triple) { - switch (Triple.getArch()) { - case llvm::Triple::xcore: - case llvm::Triple::wasm32: - case llvm::Triple::wasm64: - // XCore never wants frame pointers, regardless of OS. - // WebAssembly never wants frame pointers. - return false; - default: - break; - } - - if (Triple.isOSLinux() || Triple.getOS() == llvm::Triple::CloudABI) { - switch (Triple.getArch()) { - // Don't use a frame pointer on linux if optimizing for certain targets. - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::systemz: - case llvm::Triple::x86: - case llvm::Triple::x86_64: - return !areOptimizationsEnabled(Args); - default: - return true; - } - } - - if (Triple.isOSWindows()) { - switch (Triple.getArch()) { - case llvm::Triple::x86: - return !areOptimizationsEnabled(Args); - case llvm::Triple::x86_64: - return Triple.isOSBinFormatMachO(); - case llvm::Triple::arm: - case llvm::Triple::thumb: - // Windows on ARM builds with FPO disabled to aid fast stack walking - return true; - default: - // All other supported Windows ISAs use xdata unwind information, so frame - // pointers are not generally useful. - return false; - } - } - - return true; -} - -static bool shouldUseFramePointer(const ArgList &Args, - const llvm::Triple &Triple) { - if (Arg *A = Args.getLastArg(options::OPT_fno_omit_frame_pointer, - options::OPT_fomit_frame_pointer)) - return A->getOption().matches(options::OPT_fno_omit_frame_pointer) || - mustUseFramePointerForTarget(Triple); - - if (Args.hasArg(options::OPT_pg)) - return true; - - return useFramePointerForTargetByDefault(Args, Triple); -} - -static bool shouldUseLeafFramePointer(const ArgList &Args, - const llvm::Triple &Triple) { - if (Arg *A = Args.getLastArg(options::OPT_mno_omit_leaf_frame_pointer, - options::OPT_momit_leaf_frame_pointer)) - return A->getOption().matches(options::OPT_mno_omit_leaf_frame_pointer) || - mustUseFramePointerForTarget(Triple); - - if (Args.hasArg(options::OPT_pg)) - return true; - - if (Triple.isPS4CPU()) - return false; - - return useFramePointerForTargetByDefault(Args, Triple); -} - -/// Add a CC1 option to specify the debug compilation directory. -static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs) { - SmallString<128> cwd; - if (!llvm::sys::fs::current_path(cwd)) { - CmdArgs.push_back("-fdebug-compilation-dir"); - CmdArgs.push_back(Args.MakeArgString(cwd)); - } -} - -static const char *SplitDebugName(const ArgList &Args, const InputInfo &Input) { - Arg *FinalOutput = Args.getLastArg(options::OPT_o); - if (FinalOutput && Args.hasArg(options::OPT_c)) { - SmallString<128> T(FinalOutput->getValue()); - llvm::sys::path::replace_extension(T, "dwo"); - return Args.MakeArgString(T); - } else { - // Use the compilation dir. - SmallString<128> T( - Args.getLastArgValue(options::OPT_fdebug_compilation_dir)); - SmallString<128> F(llvm::sys::path::stem(Input.getBaseInput())); - llvm::sys::path::replace_extension(F, "dwo"); - T += F; - return Args.MakeArgString(F); - } -} - -static void SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T, - const JobAction &JA, const ArgList &Args, - const InputInfo &Output, const char *OutFile) { - ArgStringList ExtractArgs; - ExtractArgs.push_back("--extract-dwo"); - - ArgStringList StripArgs; - StripArgs.push_back("--strip-dwo"); - - // Grabbing the output of the earlier compile step. - StripArgs.push_back(Output.getFilename()); - ExtractArgs.push_back(Output.getFilename()); - ExtractArgs.push_back(OutFile); - - const char *Exec = Args.MakeArgString(TC.GetProgramPath("objcopy")); - InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename()); - - // First extract the dwo sections. - C.addCommand(llvm::make_unique(JA, T, Exec, ExtractArgs, II)); - - // Then remove them from the original .o file. - C.addCommand(llvm::make_unique(JA, T, Exec, StripArgs, II)); -} - -/// \brief Vectorize at all optimization levels greater than 1 except for -Oz. -/// For -Oz the loop vectorizer is disable, while the slp vectorizer is enabled. -static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - return true; - - if (A->getOption().matches(options::OPT_O0)) - return false; - - assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); - - // Vectorize -Os. - StringRef S(A->getValue()); - if (S == "s") - return true; - - // Don't vectorize -Oz, unless it's the slp vectorizer. - if (S == "z") - return isSlpVec; - - unsigned OptLevel = 0; - if (S.getAsInteger(10, OptLevel)) - return false; - - return OptLevel > 1; - } - - return false; -} - -/// Add -x lang to \p CmdArgs for \p Input. -static void addDashXForInput(const ArgList &Args, const InputInfo &Input, - ArgStringList &CmdArgs) { - // When using -verify-pch, we don't want to provide the type - // 'precompiled-header' if it was inferred from the file extension - if (Args.hasArg(options::OPT_verify_pch) && Input.getType() == types::TY_PCH) - return; - - CmdArgs.push_back("-x"); - if (Args.hasArg(options::OPT_rewrite_objc)) - CmdArgs.push_back(types::getTypeName(types::TY_PP_ObjCXX)); - else - CmdArgs.push_back(types::getTypeName(Input.getType())); -} - -// Claim options we don't want to warn if they are unused. We do this for -// options that build systems might add but are unused when assembling or only -// running the preprocessor for example. -static void claimNoWarnArgs(const ArgList &Args) { - // Don't warn about unused -f(no-)?lto. This can happen when we're - // preprocessing, precompiling or assembling. - Args.ClaimAllArgs(options::OPT_flto_EQ); - Args.ClaimAllArgs(options::OPT_flto); - Args.ClaimAllArgs(options::OPT_fno_lto); -} - -static void appendUserToPath(SmallVectorImpl &Result) { -#ifdef LLVM_ON_UNIX - const char *Username = getenv("LOGNAME"); -#else - const char *Username = getenv("USERNAME"); -#endif - if (Username) { - // Validate that LoginName can be used in a path, and get its length. - size_t Len = 0; - for (const char *P = Username; *P; ++P, ++Len) { - if (!isAlphanumeric(*P) && *P != '_') { - Username = nullptr; - break; - } - } - - if (Username && Len > 0) { - Result.append(Username, Username + Len); - return; - } - } - -// Fallback to user id. -#ifdef LLVM_ON_UNIX - std::string UID = llvm::utostr(getuid()); -#else - // FIXME: Windows seems to have an 'SID' that might work. - std::string UID = "9999"; -#endif - Result.append(UID.begin(), UID.end()); -} - -static Arg *getLastProfileUseArg(const ArgList &Args) { - auto *ProfileUseArg = Args.getLastArg( - options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ, - options::OPT_fprofile_use, options::OPT_fprofile_use_EQ, - options::OPT_fno_profile_instr_use); - - if (ProfileUseArg && - ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use)) - ProfileUseArg = nullptr; - - return ProfileUseArg; -} - -static void addPGOAndCoverageFlags(Compilation &C, const Driver &D, - const InputInfo &Output, const ArgList &Args, - ArgStringList &CmdArgs) { - - auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate, - options::OPT_fprofile_generate_EQ, - options::OPT_fno_profile_generate); - if (PGOGenerateArg && - PGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate)) - PGOGenerateArg = nullptr; - - auto *ProfileGenerateArg = Args.getLastArg( - options::OPT_fprofile_instr_generate, - options::OPT_fprofile_instr_generate_EQ, - options::OPT_fno_profile_instr_generate); - if (ProfileGenerateArg && - ProfileGenerateArg->getOption().matches( - options::OPT_fno_profile_instr_generate)) - ProfileGenerateArg = nullptr; - - if (PGOGenerateArg && ProfileGenerateArg) - D.Diag(diag::err_drv_argument_not_allowed_with) - << PGOGenerateArg->getSpelling() << ProfileGenerateArg->getSpelling(); - - auto *ProfileUseArg = getLastProfileUseArg(Args); - - if (PGOGenerateArg && ProfileUseArg) - D.Diag(diag::err_drv_argument_not_allowed_with) - << ProfileUseArg->getSpelling() << PGOGenerateArg->getSpelling(); - - if (ProfileGenerateArg && ProfileUseArg) - D.Diag(diag::err_drv_argument_not_allowed_with) - << ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling(); - - if (ProfileGenerateArg) { - if (ProfileGenerateArg->getOption().matches( - options::OPT_fprofile_instr_generate_EQ)) - CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-instrument-path=") + - ProfileGenerateArg->getValue())); - // The default is to use Clang Instrumentation. - CmdArgs.push_back("-fprofile-instrument=clang"); - } - - if (PGOGenerateArg) { - CmdArgs.push_back("-fprofile-instrument=llvm"); - if (PGOGenerateArg->getOption().matches( - options::OPT_fprofile_generate_EQ)) { - SmallString<128> Path(PGOGenerateArg->getValue()); - llvm::sys::path::append(Path, "default_%m.profraw"); - CmdArgs.push_back( - Args.MakeArgString(Twine("-fprofile-instrument-path=") + Path)); - } - } - - if (ProfileUseArg) { - if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ)) - CmdArgs.push_back(Args.MakeArgString( - Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue())); - else if ((ProfileUseArg->getOption().matches( - options::OPT_fprofile_use_EQ) || - ProfileUseArg->getOption().matches( - options::OPT_fprofile_instr_use))) { - SmallString<128> Path( - ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue()); - if (Path.empty() || llvm::sys::fs::is_directory(Path)) - llvm::sys::path::append(Path, "default.profdata"); - CmdArgs.push_back( - Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path)); - } - } - - if (Args.hasArg(options::OPT_ftest_coverage) || - Args.hasArg(options::OPT_coverage)) - CmdArgs.push_back("-femit-coverage-notes"); - if (Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs, - false) || - Args.hasArg(options::OPT_coverage)) - CmdArgs.push_back("-femit-coverage-data"); - - if (Args.hasFlag(options::OPT_fcoverage_mapping, - options::OPT_fno_coverage_mapping, false) && - !ProfileGenerateArg) - D.Diag(diag::err_drv_argument_only_allowed_with) - << "-fcoverage-mapping" - << "-fprofile-instr-generate"; - - if (Args.hasFlag(options::OPT_fcoverage_mapping, - options::OPT_fno_coverage_mapping, false)) - CmdArgs.push_back("-fcoverage-mapping"); - - if (C.getArgs().hasArg(options::OPT_c) || - C.getArgs().hasArg(options::OPT_S)) { - if (Output.isFilename()) { - CmdArgs.push_back("-coverage-notes-file"); - SmallString<128> OutputFilename; - if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) - OutputFilename = FinalOutput->getValue(); - else - OutputFilename = llvm::sys::path::filename(Output.getBaseInput()); - SmallString<128> CoverageFilename = OutputFilename; - if (llvm::sys::path::is_relative(CoverageFilename)) { - SmallString<128> Pwd; - if (!llvm::sys::fs::current_path(Pwd)) { - llvm::sys::path::append(Pwd, CoverageFilename); - CoverageFilename.swap(Pwd); - } - } - llvm::sys::path::replace_extension(CoverageFilename, "gcno"); - CmdArgs.push_back(Args.MakeArgString(CoverageFilename)); - - // Leave -fprofile-dir= an unused argument unless .gcda emission is - // enabled. To be polite, with '-fprofile-arcs -fno-profile-arcs' consider - // the flag used. There is no -fno-profile-dir, so the user has no - // targeted way to suppress the warning. - if (Args.hasArg(options::OPT_fprofile_arcs) || - Args.hasArg(options::OPT_coverage)) { - CmdArgs.push_back("-coverage-data-file"); - if (Arg *FProfileDir = Args.getLastArg(options::OPT_fprofile_dir)) { - CoverageFilename = FProfileDir->getValue(); - llvm::sys::path::append(CoverageFilename, OutputFilename); - } - llvm::sys::path::replace_extension(CoverageFilename, "gcda"); - CmdArgs.push_back(Args.MakeArgString(CoverageFilename)); - } - } - } -} - -static void addPS4ProfileRTArgs(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs) { - if ((Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs, - false) || - Args.hasFlag(options::OPT_fprofile_generate, - options::OPT_fno_profile_instr_generate, false) || - Args.hasFlag(options::OPT_fprofile_generate_EQ, - options::OPT_fno_profile_instr_generate, false) || - Args.hasFlag(options::OPT_fprofile_instr_generate, - options::OPT_fno_profile_instr_generate, false) || - Args.hasFlag(options::OPT_fprofile_instr_generate_EQ, - options::OPT_fno_profile_instr_generate, false) || - Args.hasArg(options::OPT_fcreate_profile) || - Args.hasArg(options::OPT_coverage))) - CmdArgs.push_back("--dependent-lib=libclang_rt.profile-x86_64.a"); -} - -/// Parses the various -fpic/-fPIC/-fpie/-fPIE arguments. Then, -/// smooshes them together with platform defaults, to decide whether -/// this compile should be using PIC mode or not. Returns a tuple of -/// (RelocationModel, PICLevel, IsPIE). -static std::tuple -ParsePICArgs(const ToolChain &ToolChain, const llvm::Triple &Triple, - const ArgList &Args) { - // FIXME: why does this code...and so much everywhere else, use both - // ToolChain.getTriple() and Triple? - bool PIE = ToolChain.isPIEDefault(); - bool PIC = PIE || ToolChain.isPICDefault(); - // The Darwin/MachO default to use PIC does not apply when using -static. - if (ToolChain.getTriple().isOSBinFormatMachO() && - Args.hasArg(options::OPT_static)) - PIE = PIC = false; - bool IsPICLevelTwo = PIC; - - bool KernelOrKext = - Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); - - // Android-specific defaults for PIC/PIE - if (ToolChain.getTriple().isAndroid()) { - switch (ToolChain.getArch()) { - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - case llvm::Triple::aarch64: - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - PIC = true; // "-fpic" - break; - - case llvm::Triple::x86: - case llvm::Triple::x86_64: - PIC = true; // "-fPIC" - IsPICLevelTwo = true; - break; - - default: - break; - } - } - - // OpenBSD-specific defaults for PIE - if (ToolChain.getTriple().getOS() == llvm::Triple::OpenBSD) { - switch (ToolChain.getArch()) { - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - case llvm::Triple::sparcel: - case llvm::Triple::x86: - case llvm::Triple::x86_64: - IsPICLevelTwo = false; // "-fpie" - break; - - case llvm::Triple::ppc: - case llvm::Triple::sparc: - case llvm::Triple::sparcv9: - IsPICLevelTwo = true; // "-fPIE" - break; - - default: - break; - } - } - - // The last argument relating to either PIC or PIE wins, and no - // other argument is used. If the last argument is any flavor of the - // '-fno-...' arguments, both PIC and PIE are disabled. Any PIE - // option implicitly enables PIC at the same level. - Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, - options::OPT_fpic, options::OPT_fno_pic, - options::OPT_fPIE, options::OPT_fno_PIE, - options::OPT_fpie, options::OPT_fno_pie); - // Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness - // is forced, then neither PIC nor PIE flags will have no effect. - if (!ToolChain.isPICDefaultForced()) { - if (LastPICArg) { - Option O = LastPICArg->getOption(); - if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) || - O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) { - PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie); - PIC = - PIE || O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic); - IsPICLevelTwo = - O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC); - } else { - PIE = PIC = false; - if (Triple.isPS4CPU()) { - Arg *ModelArg = Args.getLastArg(options::OPT_mcmodel_EQ); - StringRef Model = ModelArg ? ModelArg->getValue() : ""; - if (Model != "kernel") { - PIC = true; - ToolChain.getDriver().Diag(diag::warn_drv_ps4_force_pic) - << LastPICArg->getSpelling(); - } - } - } - } - } - - // Introduce a Darwin and PS4-specific hack. If the default is PIC, but the - // PIC level would've been set to level 1, force it back to level 2 PIC - // instead. - if (PIC && (ToolChain.getTriple().isOSDarwin() || Triple.isPS4CPU())) - IsPICLevelTwo |= ToolChain.isPICDefault(); - - // This kernel flags are a trump-card: they will disable PIC/PIE - // generation, independent of the argument order. - if (KernelOrKext && ((!Triple.isiOS() || Triple.isOSVersionLT(6)) && - !Triple.isWatchOS())) - PIC = PIE = false; - - if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) { - // This is a very special mode. It trumps the other modes, almost no one - // uses it, and it isn't even valid on any OS but Darwin. - if (!ToolChain.getTriple().isOSDarwin()) - ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) - << A->getSpelling() << ToolChain.getTriple().str(); - - // FIXME: Warn when this flag trumps some other PIC or PIE flag. - - // Only a forced PIC mode can cause the actual compile to have PIC defines - // etc., no flags are sufficient. This behavior was selected to closely - // match that of llvm-gcc and Apple GCC before that. - PIC = ToolChain.isPICDefault() && ToolChain.isPICDefaultForced(); - - return std::make_tuple(llvm::Reloc::DynamicNoPIC, PIC ? 2U : 0U, false); - } - - bool EmbeddedPISupported; - switch (ToolChain.getArch()) { - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - EmbeddedPISupported = true; - break; - default: - EmbeddedPISupported = false; - break; - } - - bool ROPI = false, RWPI = false; - Arg* LastROPIArg = Args.getLastArg(options::OPT_fropi, options::OPT_fno_ropi); - if (LastROPIArg && LastROPIArg->getOption().matches(options::OPT_fropi)) { - if (!EmbeddedPISupported) - ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) - << LastROPIArg->getSpelling() << ToolChain.getTriple().str(); - ROPI = true; - } - Arg *LastRWPIArg = Args.getLastArg(options::OPT_frwpi, options::OPT_fno_rwpi); - if (LastRWPIArg && LastRWPIArg->getOption().matches(options::OPT_frwpi)) { - if (!EmbeddedPISupported) - ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target) - << LastRWPIArg->getSpelling() << ToolChain.getTriple().str(); - RWPI = true; - } - - // ROPI and RWPI are not comaptible with PIC or PIE. - if ((ROPI || RWPI) && (PIC || PIE)) { - ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic); - } - - if (PIC) - return std::make_tuple(llvm::Reloc::PIC_, IsPICLevelTwo ? 2U : 1U, PIE); - - llvm::Reloc::Model RelocM = llvm::Reloc::Static; - if (ROPI && RWPI) - RelocM = llvm::Reloc::ROPI_RWPI; - else if (ROPI) - RelocM = llvm::Reloc::ROPI; - else if (RWPI) - RelocM = llvm::Reloc::RWPI; - - return std::make_tuple(RelocM, 0U, false); -} - -static const char *RelocationModelName(llvm::Reloc::Model Model) { - switch (Model) { - case llvm::Reloc::Static: - return "static"; - case llvm::Reloc::PIC_: - return "pic"; - case llvm::Reloc::DynamicNoPIC: - return "dynamic-no-pic"; - case llvm::Reloc::ROPI: - return "ropi"; - case llvm::Reloc::RWPI: - return "rwpi"; - case llvm::Reloc::ROPI_RWPI: - return "ropi-rwpi"; - } - llvm_unreachable("Unknown Reloc::Model kind"); -} - -static void AddAssemblerKPIC(const ToolChain &ToolChain, const ArgList &Args, - ArgStringList &CmdArgs) { - llvm::Reloc::Model RelocationModel; - unsigned PICLevel; - bool IsPIE; - std::tie(RelocationModel, PICLevel, IsPIE) = - ParsePICArgs(ToolChain, ToolChain.getTriple(), Args); - - if (RelocationModel != llvm::Reloc::Static) - CmdArgs.push_back("-KPIC"); -} - -void Clang::DumpCompilationDatabase(Compilation &C, StringRef Filename, - StringRef Target, const InputInfo &Output, - const InputInfo &Input, const ArgList &Args) const { - // If this is a dry run, do not create the compilation database file. - if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) - return; - - using llvm::yaml::escape; - const Driver &D = getToolChain().getDriver(); - - if (!CompilationDatabase) { - std::error_code EC; - auto File = llvm::make_unique(Filename, EC, llvm::sys::fs::F_Text); - if (EC) { - D.Diag(clang::diag::err_drv_compilationdatabase) << Filename - << EC.message(); - return; - } - CompilationDatabase = std::move(File); - } - auto &CDB = *CompilationDatabase; - SmallString<128> Buf; - if (llvm::sys::fs::current_path(Buf)) - Buf = "."; - CDB << "{ \"directory\": \"" << escape(Buf) << "\""; - CDB << ", \"file\": \"" << escape(Input.getFilename()) << "\""; - CDB << ", \"output\": \"" << escape(Output.getFilename()) << "\""; - CDB << ", \"arguments\": [\"" << escape(D.ClangExecutable) << "\""; - Buf = "-x"; - Buf += types::getTypeName(Input.getType()); - CDB << ", \"" << escape(Buf) << "\""; - if (!D.SysRoot.empty() && !Args.hasArg(options::OPT__sysroot_EQ)) { - Buf = "--sysroot="; - Buf += D.SysRoot; - CDB << ", \"" << escape(Buf) << "\""; - } - CDB << ", \"" << escape(Input.getFilename()) << "\""; - for (auto &A: Args) { - auto &O = A->getOption(); - // Skip language selection, which is positional. - if (O.getID() == options::OPT_x) - continue; - // Skip writing dependency output and the compilation database itself. - if (O.getGroup().isValid() && O.getGroup().getID() == options::OPT_M_Group) - continue; - // Skip inputs. - if (O.getKind() == Option::InputClass) - continue; - // All other arguments are quoted and appended. - ArgStringList ASL; - A->render(Args, ASL); - for (auto &it: ASL) - CDB << ", \"" << escape(it) << "\""; - } - Buf = "--target="; - Buf += Target; - CDB << ", \"" << escape(Buf) << "\"]},\n"; -} - -void Clang::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, const InputInfoList &Inputs, - const ArgList &Args, const char *LinkingOutput) const { - const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); - const std::string &TripleStr = Triple.getTriple(); - - bool KernelOrKext = - Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); - const Driver &D = getToolChain().getDriver(); - ArgStringList CmdArgs; - - bool IsWindowsGNU = getToolChain().getTriple().isWindowsGNUEnvironment(); - bool IsWindowsCygnus = - getToolChain().getTriple().isWindowsCygwinEnvironment(); - bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment(); - bool IsPS4CPU = getToolChain().getTriple().isPS4CPU(); - bool IsIAMCU = getToolChain().getTriple().isOSIAMCU(); - - // Check number of inputs for sanity. We need at least one input. - assert(Inputs.size() >= 1 && "Must have at least one input."); - const InputInfo &Input = Inputs[0]; - // CUDA compilation may have multiple inputs (source file + results of - // device-side compilations). OpenMP device jobs also take the host IR as a - // second input. All other jobs are expected to have exactly one - // input. - bool IsCuda = JA.isOffloading(Action::OFK_Cuda); - bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP); - assert((IsCuda || (IsOpenMPDevice && Inputs.size() == 2) || - Inputs.size() == 1) && - "Unable to handle multiple inputs."); - - // C++ is not supported for IAMCU. - if (IsIAMCU && types::isCXX(Input.getType())) - D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU"; - - // Invoke ourselves in -cc1 mode. - // - // FIXME: Implement custom jobs for internal actions. - CmdArgs.push_back("-cc1"); - - // Add the "effective" target triple. - CmdArgs.push_back("-triple"); - CmdArgs.push_back(Args.MakeArgString(TripleStr)); - - if (const Arg *MJ = Args.getLastArg(options::OPT_MJ)) { - DumpCompilationDatabase(C, MJ->getValue(), TripleStr, Output, Input, Args); - Args.ClaimAllArgs(options::OPT_MJ); - } - - if (IsCuda) { - // We have to pass the triple of the host if compiling for a CUDA device and - // vice-versa. - std::string NormalizedTriple; - if (JA.isDeviceOffloading(Action::OFK_Cuda)) - NormalizedTriple = C.getSingleOffloadToolChain() - ->getTriple() - .normalize(); - else - NormalizedTriple = C.getSingleOffloadToolChain() - ->getTriple() - .normalize(); - - CmdArgs.push_back("-aux-triple"); - CmdArgs.push_back(Args.MakeArgString(NormalizedTriple)); - } - - if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm || - Triple.getArch() == llvm::Triple::thumb)) { - unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6; - unsigned Version; - Triple.getArchName().substr(Offset).getAsInteger(10, Version); - if (Version < 7) - D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName() - << TripleStr; - } - - // Push all default warning arguments that are specific to - // the given target. These come before user provided warning options - // are provided. - getToolChain().addClangWarningOptions(CmdArgs); - - // Select the appropriate action. - RewriteKind rewriteKind = RK_None; - - if (isa(JA)) { - assert(JA.getType() == types::TY_Plist && "Invalid output type."); - CmdArgs.push_back("-analyze"); - } else if (isa(JA)) { - CmdArgs.push_back("-migrate"); - } else if (isa(JA)) { - if (Output.getType() == types::TY_Dependencies) - CmdArgs.push_back("-Eonly"); - else { - CmdArgs.push_back("-E"); - if (Args.hasArg(options::OPT_rewrite_objc) && - !Args.hasArg(options::OPT_g_Group)) - CmdArgs.push_back("-P"); - } - } else if (isa(JA)) { - CmdArgs.push_back("-emit-obj"); - - CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D); - - // Also ignore explicit -force_cpusubtype_ALL option. - (void)Args.hasArg(options::OPT_force__cpusubtype__ALL); - } else if (isa(JA)) { - // Use PCH if the user requested it. - bool UsePCH = D.CCCUsePCH; - - if (JA.getType() == types::TY_Nothing) - CmdArgs.push_back("-fsyntax-only"); - else if (JA.getType() == types::TY_ModuleFile) - CmdArgs.push_back("-emit-module-interface"); - else if (UsePCH) - CmdArgs.push_back("-emit-pch"); - else - CmdArgs.push_back("-emit-pth"); - } else if (isa(JA)) { - CmdArgs.push_back("-verify-pch"); - } else { - assert((isa(JA) || isa(JA)) && - "Invalid action for clang tool."); - if (JA.getType() == types::TY_Nothing) { - CmdArgs.push_back("-fsyntax-only"); - } else if (JA.getType() == types::TY_LLVM_IR || - JA.getType() == types::TY_LTO_IR) { - CmdArgs.push_back("-emit-llvm"); - } else if (JA.getType() == types::TY_LLVM_BC || - JA.getType() == types::TY_LTO_BC) { - CmdArgs.push_back("-emit-llvm-bc"); - } else if (JA.getType() == types::TY_PP_Asm) { - CmdArgs.push_back("-S"); - } else if (JA.getType() == types::TY_AST) { - CmdArgs.push_back("-emit-pch"); - } else if (JA.getType() == types::TY_ModuleFile) { - CmdArgs.push_back("-module-file-info"); - } else if (JA.getType() == types::TY_RewrittenObjC) { - CmdArgs.push_back("-rewrite-objc"); - rewriteKind = RK_NonFragile; - } else if (JA.getType() == types::TY_RewrittenLegacyObjC) { - CmdArgs.push_back("-rewrite-objc"); - rewriteKind = RK_Fragile; - } else { - assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!"); - } - - // Preserve use-list order by default when emitting bitcode, so that - // loading the bitcode up in 'opt' or 'llc' and running passes gives the - // same result as running passes here. For LTO, we don't need to preserve - // the use-list order, since serialization to bitcode is part of the flow. - if (JA.getType() == types::TY_LLVM_BC) - CmdArgs.push_back("-emit-llvm-uselists"); - - if (D.isUsingLTO()) - Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); - } - - if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) { - if (!types::isLLVMIR(Input.getType())) - D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) - << "-x ir"; - Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ); - } - - // Embed-bitcode option. - if (C.getDriver().embedBitcodeInObject() && - (isa(JA) || isa(JA))) { - // Add flags implied by -fembed-bitcode. - Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ); - // Disable all llvm IR level optimizations. - CmdArgs.push_back("-disable-llvm-optzns"); - } - if (C.getDriver().embedBitcodeMarkerOnly()) - CmdArgs.push_back("-fembed-bitcode=marker"); - - // We normally speed up the clang process a bit by skipping destructors at - // exit, but when we're generating diagnostics we can rely on some of the - // cleanup. - if (!C.isForDiagnostics()) - CmdArgs.push_back("-disable-free"); - -// Disable the verification pass in -asserts builds. -#ifdef NDEBUG - CmdArgs.push_back("-disable-llvm-verifier"); - // Discard LLVM value names in -asserts builds. - CmdArgs.push_back("-discard-value-names"); -#endif - - // Set the main file name, so that debug info works even with - // -save-temps. - CmdArgs.push_back("-main-file-name"); - CmdArgs.push_back(getBaseInputName(Args, Input)); - - // Some flags which affect the language (via preprocessor - // defines). - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("-static-define"); - - if (isa(JA)) { - // Enable region store model by default. - CmdArgs.push_back("-analyzer-store=region"); - - // Treat blocks as analysis entry points. - CmdArgs.push_back("-analyzer-opt-analyze-nested-blocks"); - - CmdArgs.push_back("-analyzer-eagerly-assume"); - - // Add default argument set. - if (!Args.hasArg(options::OPT__analyzer_no_default_checks)) { - CmdArgs.push_back("-analyzer-checker=core"); - - if (!IsWindowsMSVC) { - CmdArgs.push_back("-analyzer-checker=unix"); - } else { - // Enable "unix" checkers that also work on Windows. - CmdArgs.push_back("-analyzer-checker=unix.API"); - CmdArgs.push_back("-analyzer-checker=unix.Malloc"); - CmdArgs.push_back("-analyzer-checker=unix.MallocSizeof"); - CmdArgs.push_back("-analyzer-checker=unix.MismatchedDeallocator"); - CmdArgs.push_back("-analyzer-checker=unix.cstring.BadSizeArg"); - CmdArgs.push_back("-analyzer-checker=unix.cstring.NullArg"); - } - - // Disable some unix checkers for PS4. - if (IsPS4CPU) { - CmdArgs.push_back("-analyzer-disable-checker=unix.API"); - CmdArgs.push_back("-analyzer-disable-checker=unix.Vfork"); - } - - if (getToolChain().getTriple().getVendor() == llvm::Triple::Apple) - CmdArgs.push_back("-analyzer-checker=osx"); - - CmdArgs.push_back("-analyzer-checker=deadcode"); - - if (types::isCXX(Input.getType())) - CmdArgs.push_back("-analyzer-checker=cplusplus"); - - if (!IsPS4CPU) { - CmdArgs.push_back( - "-analyzer-checker=security.insecureAPI.UncheckedReturn"); - CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw"); - CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets"); - CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp"); - CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp"); - CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork"); - } - - // Default nullability checks. - CmdArgs.push_back("-analyzer-checker=nullability.NullPassedToNonnull"); - CmdArgs.push_back( - "-analyzer-checker=nullability.NullReturnedFromNonnull"); - } - - // Set the output format. The default is plist, for (lame) historical - // reasons. - CmdArgs.push_back("-analyzer-output"); - if (Arg *A = Args.getLastArg(options::OPT__analyzer_output)) - CmdArgs.push_back(A->getValue()); - else - CmdArgs.push_back("plist"); - - // Disable the presentation of standard compiler warnings when - // using --analyze. We only want to show static analyzer diagnostics - // or frontend errors. - CmdArgs.push_back("-w"); - - // Add -Xanalyzer arguments when running as analyzer. - Args.AddAllArgValues(CmdArgs, options::OPT_Xanalyzer); - } - - CheckCodeGenerationOptions(D, Args); - - llvm::Reloc::Model RelocationModel; - unsigned PICLevel; - bool IsPIE; - std::tie(RelocationModel, PICLevel, IsPIE) = - ParsePICArgs(getToolChain(), Triple, Args); - - const char *RMName = RelocationModelName(RelocationModel); - - if ((RelocationModel == llvm::Reloc::ROPI || - RelocationModel == llvm::Reloc::ROPI_RWPI) && - types::isCXX(Input.getType()) && - !Args.hasArg(options::OPT_fallow_unsupported)) - D.Diag(diag::err_drv_ropi_incompatible_with_cxx); - - if (RMName) { - CmdArgs.push_back("-mrelocation-model"); - CmdArgs.push_back(RMName); - } - if (PICLevel > 0) { - CmdArgs.push_back("-pic-level"); - CmdArgs.push_back(PICLevel == 1 ? "1" : "2"); - if (IsPIE) - CmdArgs.push_back("-pic-is-pie"); - } - - if (Arg *A = Args.getLastArg(options::OPT_meabi)) { - CmdArgs.push_back("-meabi"); - CmdArgs.push_back(A->getValue()); - } - - CmdArgs.push_back("-mthread-model"); - if (Arg *A = Args.getLastArg(options::OPT_mthread_model)) - CmdArgs.push_back(A->getValue()); - else - CmdArgs.push_back(Args.MakeArgString(getToolChain().getThreadModel())); - - Args.AddLastArg(CmdArgs, options::OPT_fveclib); - - if (!Args.hasFlag(options::OPT_fmerge_all_constants, - options::OPT_fno_merge_all_constants)) - CmdArgs.push_back("-fno-merge-all-constants"); - - // LLVM Code Generator Options. - - if (Args.hasArg(options::OPT_frewrite_map_file) || - Args.hasArg(options::OPT_frewrite_map_file_EQ)) { - for (const Arg *A : Args.filtered(options::OPT_frewrite_map_file, - options::OPT_frewrite_map_file_EQ)) { - StringRef Map = A->getValue(); - if (!llvm::sys::fs::exists(Map)) { - D.Diag(diag::err_drv_no_such_file) << Map; - } else { - CmdArgs.push_back("-frewrite-map-file"); - CmdArgs.push_back(A->getValue()); - A->claim(); - } - } - } - - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { - StringRef v = A->getValue(); - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); - A->claim(); - } - - if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables, - true)) - CmdArgs.push_back("-fno-jump-tables"); - - if (!Args.hasFlag(options::OPT_fpreserve_as_comments, - options::OPT_fno_preserve_as_comments, true)) - CmdArgs.push_back("-fno-preserve-as-comments"); - - if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) { - CmdArgs.push_back("-mregparm"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return, - options::OPT_freg_struct_return)) { - if (getToolChain().getArch() != llvm::Triple::x86) { - D.Diag(diag::err_drv_unsupported_opt_for_target) - << A->getSpelling() << getToolChain().getTriple().str(); - } else if (A->getOption().matches(options::OPT_fpcc_struct_return)) { - CmdArgs.push_back("-fpcc-struct-return"); - } else { - assert(A->getOption().matches(options::OPT_freg_struct_return)); - CmdArgs.push_back("-freg-struct-return"); - } - } - - if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false)) - CmdArgs.push_back("-fdefault-calling-conv=stdcall"); - - if (shouldUseFramePointer(Args, getToolChain().getTriple())) - CmdArgs.push_back("-mdisable-fp-elim"); - if (!Args.hasFlag(options::OPT_fzero_initialized_in_bss, - options::OPT_fno_zero_initialized_in_bss)) - CmdArgs.push_back("-mno-zero-initialized-in-bss"); - - bool OFastEnabled = isOptimizationLevelFast(Args); - // If -Ofast is the optimization level, then -fstrict-aliasing should be - // enabled. This alias option is being used to simplify the hasFlag logic. - OptSpecifier StrictAliasingAliasOption = - OFastEnabled ? options::OPT_Ofast : options::OPT_fstrict_aliasing; - // We turn strict aliasing off by default if we're in CL mode, since MSVC - // doesn't do any TBAA. - bool TBAAOnByDefault = !getToolChain().getDriver().IsCLMode(); - if (!Args.hasFlag(options::OPT_fstrict_aliasing, StrictAliasingAliasOption, - options::OPT_fno_strict_aliasing, TBAAOnByDefault)) - CmdArgs.push_back("-relaxed-aliasing"); - if (!Args.hasFlag(options::OPT_fstruct_path_tbaa, - options::OPT_fno_struct_path_tbaa)) - CmdArgs.push_back("-no-struct-path-tbaa"); - if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums, - false)) - CmdArgs.push_back("-fstrict-enums"); - if (Args.hasFlag(options::OPT_fstrict_vtable_pointers, - options::OPT_fno_strict_vtable_pointers, - false)) - CmdArgs.push_back("-fstrict-vtable-pointers"); - if (!Args.hasFlag(options::OPT_foptimize_sibling_calls, - options::OPT_fno_optimize_sibling_calls)) - CmdArgs.push_back("-mdisable-tail-calls"); - - // Handle segmented stacks. - if (Args.hasArg(options::OPT_fsplit_stack)) - CmdArgs.push_back("-split-stacks"); - - // If -Ofast is the optimization level, then -ffast-math should be enabled. - // This alias option is being used to simplify the getLastArg logic. - OptSpecifier FastMathAliasOption = - OFastEnabled ? options::OPT_Ofast : options::OPT_ffast_math; - - // Handle various floating point optimization flags, mapping them to the - // appropriate LLVM code generation flags. The pattern for all of these is to - // default off the codegen optimizations, and if any flag enables them and no - // flag disables them after the flag enabling them, enable the codegen - // optimization. This is complicated by several "umbrella" flags. - if (Arg *A = Args.getLastArg( - options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math, options::OPT_ffinite_math_only, - options::OPT_fno_finite_math_only, options::OPT_fhonor_infinities, - options::OPT_fno_honor_infinities)) - if (A->getOption().getID() != options::OPT_fno_fast_math && - A->getOption().getID() != options::OPT_fno_finite_math_only && - A->getOption().getID() != options::OPT_fhonor_infinities) - CmdArgs.push_back("-menable-no-infs"); - if (Arg *A = Args.getLastArg( - options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math, options::OPT_ffinite_math_only, - options::OPT_fno_finite_math_only, options::OPT_fhonor_nans, - options::OPT_fno_honor_nans)) - if (A->getOption().getID() != options::OPT_fno_fast_math && - A->getOption().getID() != options::OPT_fno_finite_math_only && - A->getOption().getID() != options::OPT_fhonor_nans) - CmdArgs.push_back("-menable-no-nans"); - - // -fmath-errno is the default on some platforms, e.g. BSD-derived OSes. - bool MathErrno = getToolChain().IsMathErrnoDefault(); - if (Arg *A = - Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math, options::OPT_fmath_errno, - options::OPT_fno_math_errno)) { - // Turning on -ffast_math (with either flag) removes the need for MathErrno. - // However, turning *off* -ffast_math merely restores the toolchain default - // (which may be false). - if (A->getOption().getID() == options::OPT_fno_math_errno || - A->getOption().getID() == options::OPT_ffast_math || - A->getOption().getID() == options::OPT_Ofast) - MathErrno = false; - else if (A->getOption().getID() == options::OPT_fmath_errno) - MathErrno = true; - } - if (MathErrno) - CmdArgs.push_back("-fmath-errno"); - - // There are several flags which require disabling very specific - // optimizations. Any of these being disabled forces us to turn off the - // entire set of LLVM optimizations, so collect them through all the flag - // madness. - bool AssociativeMath = false; - if (Arg *A = Args.getLastArg( - options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, - options::OPT_fno_unsafe_math_optimizations, - options::OPT_fassociative_math, options::OPT_fno_associative_math)) - if (A->getOption().getID() != options::OPT_fno_fast_math && - A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && - A->getOption().getID() != options::OPT_fno_associative_math) - AssociativeMath = true; - bool ReciprocalMath = false; - if (Arg *A = Args.getLastArg( - options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, - options::OPT_fno_unsafe_math_optimizations, - options::OPT_freciprocal_math, options::OPT_fno_reciprocal_math)) - if (A->getOption().getID() != options::OPT_fno_fast_math && - A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && - A->getOption().getID() != options::OPT_fno_reciprocal_math) - ReciprocalMath = true; - bool SignedZeros = true; - if (Arg *A = Args.getLastArg( - options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, - options::OPT_fno_unsafe_math_optimizations, - options::OPT_fsigned_zeros, options::OPT_fno_signed_zeros)) - if (A->getOption().getID() != options::OPT_fno_fast_math && - A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && - A->getOption().getID() != options::OPT_fsigned_zeros) - SignedZeros = false; - bool TrappingMath = true; - if (Arg *A = Args.getLastArg( - options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, - options::OPT_fno_unsafe_math_optimizations, - options::OPT_ftrapping_math, options::OPT_fno_trapping_math)) - if (A->getOption().getID() != options::OPT_fno_fast_math && - A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations && - A->getOption().getID() != options::OPT_ftrapping_math) - TrappingMath = false; - if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros && - !TrappingMath) - CmdArgs.push_back("-menable-unsafe-fp-math"); - - if (!SignedZeros) - CmdArgs.push_back("-fno-signed-zeros"); - - if (ReciprocalMath) - CmdArgs.push_back("-freciprocal-math"); - - if (!TrappingMath) - CmdArgs.push_back("-fno-trapping-math"); - - - if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math, - options::OPT_funsafe_math_optimizations, - options::OPT_fno_unsafe_math_optimizations, - options::OPT_fdenormal_fp_math_EQ)) - if (A->getOption().getID() != options::OPT_fno_fast_math && - A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations) - Args.AddLastArg(CmdArgs, options::OPT_fdenormal_fp_math_EQ); - - // Validate and pass through -fp-contract option. - if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math, - options::OPT_ffp_contract)) { - if (A->getOption().getID() == options::OPT_ffp_contract) { - StringRef Val = A->getValue(); - if (Val == "fast" || Val == "on" || Val == "off") { - CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + Val)); - } else { - D.Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << Val; - } - } else if (A->getOption().matches(options::OPT_ffast_math) || - (OFastEnabled && A->getOption().matches(options::OPT_Ofast))) { - // If fast-math is set then set the fp-contract mode to fast. - CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast")); - } - } - - ParseMRecip(getToolChain().getDriver(), Args, CmdArgs); - - // We separately look for the '-ffast-math' and '-ffinite-math-only' flags, - // and if we find them, tell the frontend to provide the appropriate - // preprocessor macros. This is distinct from enabling any optimizations as - // these options induce language changes which must survive serialization - // and deserialization, etc. - if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption, - options::OPT_fno_fast_math)) - if (!A->getOption().matches(options::OPT_fno_fast_math)) - CmdArgs.push_back("-ffast-math"); - if (Arg *A = Args.getLastArg(options::OPT_ffinite_math_only, - options::OPT_fno_fast_math)) - if (A->getOption().matches(options::OPT_ffinite_math_only)) - CmdArgs.push_back("-ffinite-math-only"); - - // Decide whether to use verbose asm. Verbose assembly is the default on - // toolchains which have the integrated assembler on by default. - bool IsIntegratedAssemblerDefault = - getToolChain().IsIntegratedAssemblerDefault(); - if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm, - IsIntegratedAssemblerDefault) || - Args.hasArg(options::OPT_dA)) - CmdArgs.push_back("-masm-verbose"); - - if (!Args.hasFlag(options::OPT_fintegrated_as, options::OPT_fno_integrated_as, - IsIntegratedAssemblerDefault)) - CmdArgs.push_back("-no-integrated-as"); - - if (Args.hasArg(options::OPT_fdebug_pass_structure)) { - CmdArgs.push_back("-mdebug-pass"); - CmdArgs.push_back("Structure"); - } - if (Args.hasArg(options::OPT_fdebug_pass_arguments)) { - CmdArgs.push_back("-mdebug-pass"); - CmdArgs.push_back("Arguments"); - } - - // Enable -mconstructor-aliases except on darwin, where we have to work around - // a linker bug (see ), and CUDA device code, where - // aliases aren't supported. - if (!getToolChain().getTriple().isOSDarwin() && - !getToolChain().getTriple().isNVPTX()) - CmdArgs.push_back("-mconstructor-aliases"); - - // Darwin's kernel doesn't support guard variables; just die if we - // try to use them. - if (KernelOrKext && getToolChain().getTriple().isOSDarwin()) - CmdArgs.push_back("-fforbid-guard-variables"); - - if (Args.hasFlag(options::OPT_mms_bitfields, options::OPT_mno_ms_bitfields, - false)) { - CmdArgs.push_back("-mms-bitfields"); - } - - if (Args.hasFlag(options::OPT_mpie_copy_relocations, - options::OPT_mno_pie_copy_relocations, - false)) { - CmdArgs.push_back("-mpie-copy-relocations"); - } - - // This is a coarse approximation of what llvm-gcc actually does, both - // -fasynchronous-unwind-tables and -fnon-call-exceptions interact in more - // complicated ways. - bool AsynchronousUnwindTables = - Args.hasFlag(options::OPT_fasynchronous_unwind_tables, - options::OPT_fno_asynchronous_unwind_tables, - (getToolChain().IsUnwindTablesDefault() || - getToolChain().getSanitizerArgs().needsUnwindTables()) && - !KernelOrKext); - if (Args.hasFlag(options::OPT_funwind_tables, options::OPT_fno_unwind_tables, - AsynchronousUnwindTables)) - CmdArgs.push_back("-munwind-tables"); - - getToolChain().addClangTargetOptions(Args, CmdArgs); - - if (Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) { - CmdArgs.push_back("-mlimit-float-precision"); - CmdArgs.push_back(A->getValue()); - } - - // FIXME: Handle -mtune=. - (void)Args.hasArg(options::OPT_mtune_EQ); - - if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) { - CmdArgs.push_back("-mcode-model"); - CmdArgs.push_back(A->getValue()); - } - - // Add the target cpu - std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false); - if (!CPU.empty()) { - CmdArgs.push_back("-target-cpu"); - CmdArgs.push_back(Args.MakeArgString(CPU)); - } - - if (const Arg *A = Args.getLastArg(options::OPT_mfpmath_EQ)) { - CmdArgs.push_back("-mfpmath"); - CmdArgs.push_back(A->getValue()); - } - - // Add the target features - getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, false); - - // Add target specific flags. - switch (getToolChain().getArch()) { - default: - break; - - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - // Use the effective triple, which takes into account the deployment target. - AddARMTargetArgs(Triple, Args, CmdArgs, KernelOrKext); - break; - - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - AddAArch64TargetArgs(Args, CmdArgs); - break; - - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - AddMIPSTargetArgs(Args, CmdArgs); - break; - - case llvm::Triple::ppc: - case llvm::Triple::ppc64: - case llvm::Triple::ppc64le: - AddPPCTargetArgs(Args, CmdArgs); - break; - - case llvm::Triple::sparc: - case llvm::Triple::sparcel: - case llvm::Triple::sparcv9: - AddSparcTargetArgs(Args, CmdArgs); - break; - - case llvm::Triple::systemz: - AddSystemZTargetArgs(Args, CmdArgs); - break; - - case llvm::Triple::x86: - case llvm::Triple::x86_64: - AddX86TargetArgs(Args, CmdArgs); - break; - - case llvm::Triple::lanai: - AddLanaiTargetArgs(Args, CmdArgs); - break; - - case llvm::Triple::hexagon: - AddHexagonTargetArgs(Args, CmdArgs); - break; - - case llvm::Triple::wasm32: - case llvm::Triple::wasm64: - AddWebAssemblyTargetArgs(Args, CmdArgs); - break; - } - - // The 'g' groups options involve a somewhat intricate sequence of decisions - // about what to pass from the driver to the frontend, but by the time they - // reach cc1 they've been factored into three well-defined orthogonal choices: - // * what level of debug info to generate - // * what dwarf version to write - // * what debugger tuning to use - // This avoids having to monkey around further in cc1 other than to disable - // codeview if not running in a Windows environment. Perhaps even that - // decision should be made in the driver as well though. - unsigned DwarfVersion = 0; - llvm::DebuggerKind DebuggerTuning = getToolChain().getDefaultDebuggerTuning(); - // These two are potentially updated by AddClangCLArgs. - codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo; - bool EmitCodeView = false; - - // Add clang-cl arguments. - types::ID InputType = Input.getType(); - if (getToolChain().getDriver().IsCLMode()) - AddClangCLArgs(Args, InputType, CmdArgs, &DebugInfoKind, &EmitCodeView); - - // Pass the linker version in use. - if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) { - CmdArgs.push_back("-target-linker-version"); - CmdArgs.push_back(A->getValue()); - } - - if (!shouldUseLeafFramePointer(Args, getToolChain().getTriple())) - CmdArgs.push_back("-momit-leaf-frame-pointer"); - - // Explicitly error on some things we know we don't support and can't just - // ignore. - if (!Args.hasArg(options::OPT_fallow_unsupported)) { - Arg *Unsupported; - if (types::isCXX(InputType) && getToolChain().getTriple().isOSDarwin() && - getToolChain().getArch() == llvm::Triple::x86) { - if ((Unsupported = Args.getLastArg(options::OPT_fapple_kext)) || - (Unsupported = Args.getLastArg(options::OPT_mkernel))) - D.Diag(diag::err_drv_clang_unsupported_opt_cxx_darwin_i386) - << Unsupported->getOption().getName(); - } - } - - Args.AddAllArgs(CmdArgs, options::OPT_v); - Args.AddLastArg(CmdArgs, options::OPT_H); - if (D.CCPrintHeaders && !D.CCGenDiagnostics) { - CmdArgs.push_back("-header-include-file"); - CmdArgs.push_back(D.CCPrintHeadersFilename ? D.CCPrintHeadersFilename - : "-"); - } - Args.AddLastArg(CmdArgs, options::OPT_P); - Args.AddLastArg(CmdArgs, options::OPT_print_ivar_layout); - - if (D.CCLogDiagnostics && !D.CCGenDiagnostics) { - CmdArgs.push_back("-diagnostic-log-file"); - CmdArgs.push_back(D.CCLogDiagnosticsFilename ? D.CCLogDiagnosticsFilename - : "-"); - } - - bool splitDwarfInlining = - Args.hasFlag(options::OPT_fsplit_dwarf_inlining, - options::OPT_fno_split_dwarf_inlining, true); - - Args.ClaimAllArgs(options::OPT_g_Group); - Arg *SplitDwarfArg = Args.getLastArg(options::OPT_gsplit_dwarf); - if (Arg *A = Args.getLastArg(options::OPT_g_Group)) { - // If the last option explicitly specified a debug-info level, use it. - if (A->getOption().matches(options::OPT_gN_Group)) { - DebugInfoKind = DebugLevelToInfoKind(*A); - // If you say "-gsplit-dwarf -gline-tables-only", -gsplit-dwarf loses. - // But -gsplit-dwarf is not a g_group option, hence we have to check the - // order explicitly. (If -gsplit-dwarf wins, we fix DebugInfoKind later.) - // This gets a bit more complicated if you've disabled inline info in the - // skeleton CUs (splitDwarfInlining) - then there's value in composing - // split-dwarf and line-tables-only, so let those compose naturally in - // that case. - // And if you just turned off debug info, (-gsplit-dwarf -g0) - do that. - if (SplitDwarfArg) { - if (A->getIndex() > SplitDwarfArg->getIndex()) { - if (DebugInfoKind == codegenoptions::NoDebugInfo || - (DebugInfoKind == codegenoptions::DebugLineTablesOnly && - splitDwarfInlining)) - SplitDwarfArg = nullptr; - } else if (splitDwarfInlining) - DebugInfoKind = codegenoptions::NoDebugInfo; - } - } else - // For any other 'g' option, use Limited. - DebugInfoKind = codegenoptions::LimitedDebugInfo; - } - - // If a debugger tuning argument appeared, remember it. - if (Arg *A = Args.getLastArg(options::OPT_gTune_Group, - options::OPT_ggdbN_Group)) { - if (A->getOption().matches(options::OPT_glldb)) - DebuggerTuning = llvm::DebuggerKind::LLDB; - else if (A->getOption().matches(options::OPT_gsce)) - DebuggerTuning = llvm::DebuggerKind::SCE; - else - DebuggerTuning = llvm::DebuggerKind::GDB; - } - - // If a -gdwarf argument appeared, remember it. - if (Arg *A = Args.getLastArg(options::OPT_gdwarf_2, options::OPT_gdwarf_3, - options::OPT_gdwarf_4, options::OPT_gdwarf_5)) - DwarfVersion = DwarfVersionNum(A->getSpelling()); - - // Forward -gcodeview. EmitCodeView might have been set by CL-compatibility - // argument parsing. - if (Args.hasArg(options::OPT_gcodeview) || EmitCodeView) { - // DwarfVersion remains at 0 if no explicit choice was made. - CmdArgs.push_back("-gcodeview"); - } else if (DwarfVersion == 0 && - DebugInfoKind != codegenoptions::NoDebugInfo) { - DwarfVersion = getToolChain().GetDefaultDwarfVersion(); - } - - // We ignore flags -gstrict-dwarf and -grecord-gcc-switches for now. - Args.ClaimAllArgs(options::OPT_g_flags_Group); - - // Column info is included by default for everything except PS4 and CodeView. - // Clang doesn't track end columns, just starting columns, which, in theory, - // is fine for CodeView (and PDB). In practice, however, the Microsoft - // debuggers don't handle missing end columns well, so it's better not to - // include any column info. - if (Args.hasFlag(options::OPT_gcolumn_info, options::OPT_gno_column_info, - /*Default=*/ !IsPS4CPU && !(IsWindowsMSVC && EmitCodeView))) - CmdArgs.push_back("-dwarf-column-info"); - - // FIXME: Move backend command line options to the module. - // If -gline-tables-only is the last option it wins. - if (DebugInfoKind != codegenoptions::DebugLineTablesOnly && - Args.hasArg(options::OPT_gmodules)) { - DebugInfoKind = codegenoptions::LimitedDebugInfo; - CmdArgs.push_back("-dwarf-ext-refs"); - CmdArgs.push_back("-fmodule-format=obj"); - } - - // -gsplit-dwarf should turn on -g and enable the backend dwarf - // splitting and extraction. - // FIXME: Currently only works on Linux. - if (getToolChain().getTriple().isOSLinux() && SplitDwarfArg) { - if (!splitDwarfInlining) - CmdArgs.push_back("-fno-split-dwarf-inlining"); - if (DebugInfoKind == codegenoptions::NoDebugInfo) - DebugInfoKind = codegenoptions::LimitedDebugInfo; - CmdArgs.push_back("-backend-option"); - CmdArgs.push_back("-split-dwarf=Enable"); - } - - // After we've dealt with all combinations of things that could - // make DebugInfoKind be other than None or DebugLineTablesOnly, - // figure out if we need to "upgrade" it to standalone debug info. - // We parse these two '-f' options whether or not they will be used, - // to claim them even if you wrote "-fstandalone-debug -gline-tables-only" - bool NeedFullDebug = Args.hasFlag(options::OPT_fstandalone_debug, - options::OPT_fno_standalone_debug, - getToolChain().GetDefaultStandaloneDebug()); - if (DebugInfoKind == codegenoptions::LimitedDebugInfo && NeedFullDebug) - DebugInfoKind = codegenoptions::FullDebugInfo; - RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion, - DebuggerTuning); - - // -ggnu-pubnames turns on gnu style pubnames in the backend. - if (Args.hasArg(options::OPT_ggnu_pubnames)) { - CmdArgs.push_back("-backend-option"); - CmdArgs.push_back("-generate-gnu-dwarf-pub-sections"); - } - - // -gdwarf-aranges turns on the emission of the aranges section in the - // backend. - // Always enabled on the PS4. - if (Args.hasArg(options::OPT_gdwarf_aranges) || IsPS4CPU) { - CmdArgs.push_back("-backend-option"); - CmdArgs.push_back("-generate-arange-section"); - } - - if (Args.hasFlag(options::OPT_fdebug_types_section, - options::OPT_fno_debug_types_section, false)) { - CmdArgs.push_back("-backend-option"); - CmdArgs.push_back("-generate-type-units"); - } - - bool UseSeparateSections = isUseSeparateSections(Triple); - - if (Args.hasFlag(options::OPT_ffunction_sections, - options::OPT_fno_function_sections, UseSeparateSections)) { - CmdArgs.push_back("-ffunction-sections"); - } - - if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections, - UseSeparateSections)) { - CmdArgs.push_back("-fdata-sections"); - } - - if (!Args.hasFlag(options::OPT_funique_section_names, - options::OPT_fno_unique_section_names, true)) - CmdArgs.push_back("-fno-unique-section-names"); - - Args.AddAllArgs(CmdArgs, options::OPT_finstrument_functions); - - if (Args.hasFlag(options::OPT_fxray_instrument, - options::OPT_fnoxray_instrument, false)) { - const char *const XRayInstrumentOption = "-fxray-instrument"; - if (Triple.getOS() == llvm::Triple::Linux) - switch (Triple.getArch()) { - case llvm::Triple::x86_64: - case llvm::Triple::arm: - case llvm::Triple::aarch64: - // Supported. - break; - default: - D.Diag(diag::err_drv_clang_unsupported) - << (std::string(XRayInstrumentOption) + " on " + Triple.str()); - } - else - D.Diag(diag::err_drv_clang_unsupported) - << (std::string(XRayInstrumentOption) + " on non-Linux target OS"); - CmdArgs.push_back(XRayInstrumentOption); - if (const Arg *A = - Args.getLastArg(options::OPT_fxray_instruction_threshold_, - options::OPT_fxray_instruction_threshold_EQ)) { - CmdArgs.push_back("-fxray-instruction-threshold"); - CmdArgs.push_back(A->getValue()); - } - } - - addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs); - - // Add runtime flag for PS4 when PGO or Coverage are enabled. - if (getToolChain().getTriple().isPS4CPU()) - addPS4ProfileRTArgs(getToolChain(), Args, CmdArgs); - - // Pass options for controlling the default header search paths. - if (Args.hasArg(options::OPT_nostdinc)) { - CmdArgs.push_back("-nostdsysteminc"); - CmdArgs.push_back("-nobuiltininc"); - } else { - if (Args.hasArg(options::OPT_nostdlibinc)) - CmdArgs.push_back("-nostdsysteminc"); - Args.AddLastArg(CmdArgs, options::OPT_nostdincxx); - Args.AddLastArg(CmdArgs, options::OPT_nobuiltininc); - } - - // Pass the path to compiler resource files. - CmdArgs.push_back("-resource-dir"); - CmdArgs.push_back(D.ResourceDir.c_str()); - - Args.AddLastArg(CmdArgs, options::OPT_working_directory); - - bool ARCMTEnabled = false; - if (!Args.hasArg(options::OPT_fno_objc_arc, options::OPT_fobjc_arc)) { - if (const Arg *A = Args.getLastArg(options::OPT_ccc_arcmt_check, - options::OPT_ccc_arcmt_modify, - options::OPT_ccc_arcmt_migrate)) { - ARCMTEnabled = true; - switch (A->getOption().getID()) { - default: - llvm_unreachable("missed a case"); - case options::OPT_ccc_arcmt_check: - CmdArgs.push_back("-arcmt-check"); - break; - case options::OPT_ccc_arcmt_modify: - CmdArgs.push_back("-arcmt-modify"); - break; - case options::OPT_ccc_arcmt_migrate: - CmdArgs.push_back("-arcmt-migrate"); - CmdArgs.push_back("-mt-migrate-directory"); - CmdArgs.push_back(A->getValue()); - - Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_report_output); - Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_emit_arc_errors); - break; - } - } - } else { - Args.ClaimAllArgs(options::OPT_ccc_arcmt_check); - Args.ClaimAllArgs(options::OPT_ccc_arcmt_modify); - Args.ClaimAllArgs(options::OPT_ccc_arcmt_migrate); - } - - if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) { - if (ARCMTEnabled) { - D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) - << "-ccc-arcmt-migrate"; - } - CmdArgs.push_back("-mt-migrate-directory"); - CmdArgs.push_back(A->getValue()); - - if (!Args.hasArg(options::OPT_objcmt_migrate_literals, - options::OPT_objcmt_migrate_subscripting, - options::OPT_objcmt_migrate_property)) { - // None specified, means enable them all. - CmdArgs.push_back("-objcmt-migrate-literals"); - CmdArgs.push_back("-objcmt-migrate-subscripting"); - CmdArgs.push_back("-objcmt-migrate-property"); - } else { - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property); - } - } else { - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_all); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readonly_property); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readwrite_property); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property_dot_syntax); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_annotation); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_instancetype); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_nsmacros); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_protocol_conformance); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_atomic_property); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_whitelist_dir_path); - } - - // Add preprocessing options like -I, -D, etc. if we are using the - // preprocessor. - // - // FIXME: Support -fpreprocessed - if (types::getPreprocessedType(InputType) != types::TY_INVALID) - AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs); - - // Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes - // that "The compiler can only warn and ignore the option if not recognized". - // When building with ccache, it will pass -D options to clang even on - // preprocessed inputs and configure concludes that -fPIC is not supported. - Args.ClaimAllArgs(options::OPT_D); - - // Manually translate -O4 to -O3; let clang reject others. - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (A->getOption().matches(options::OPT_O4)) { - CmdArgs.push_back("-O3"); - D.Diag(diag::warn_O4_is_O3); - } else { - A->render(Args, CmdArgs); - } - } - - // Warn about ignored options to clang. - for (const Arg *A : - Args.filtered(options::OPT_clang_ignored_gcc_optimization_f_Group)) { - D.Diag(diag::warn_ignored_gcc_optimization) << A->getAsString(Args); - A->claim(); - } - - claimNoWarnArgs(Args); - - Args.AddAllArgs(CmdArgs, options::OPT_R_Group); - - Args.AddAllArgs(CmdArgs, options::OPT_W_Group); - if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false)) - CmdArgs.push_back("-pedantic"); - Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors); - Args.AddLastArg(CmdArgs, options::OPT_w); - - // Handle -{std, ansi, trigraphs} -- take the last of -{std, ansi} - // (-ansi is equivalent to -std=c89 or -std=c++98). - // - // If a std is supplied, only add -trigraphs if it follows the - // option. - bool ImplyVCPPCXXVer = false; - if (Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi)) { - if (Std->getOption().matches(options::OPT_ansi)) - if (types::isCXX(InputType)) - CmdArgs.push_back("-std=c++98"); - else - CmdArgs.push_back("-std=c89"); - else - Std->render(Args, CmdArgs); - - // If -f(no-)trigraphs appears after the language standard flag, honor it. - if (Arg *A = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi, - options::OPT_ftrigraphs, - options::OPT_fno_trigraphs)) - if (A != Std) - A->render(Args, CmdArgs); - } else { - // Honor -std-default. - // - // FIXME: Clang doesn't correctly handle -std= when the input language - // doesn't match. For the time being just ignore this for C++ inputs; - // eventually we want to do all the standard defaulting here instead of - // splitting it between the driver and clang -cc1. - if (!types::isCXX(InputType)) - Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ, "-std=", - /*Joined=*/true); - else if (IsWindowsMSVC) - ImplyVCPPCXXVer = true; - - Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs, - options::OPT_fno_trigraphs); - } - - // GCC's behavior for -Wwrite-strings is a bit strange: - // * In C, this "warning flag" changes the types of string literals from - // 'char[N]' to 'const char[N]', and thus triggers an unrelated warning - // for the discarded qualifier. - // * In C++, this is just a normal warning flag. - // - // Implementing this warning correctly in C is hard, so we follow GCC's - // behavior for now. FIXME: Directly diagnose uses of a string literal as - // a non-const char* in C, rather than using this crude hack. - if (!types::isCXX(InputType)) { - // FIXME: This should behave just like a warning flag, and thus should also - // respect -Weverything, -Wno-everything, -Werror=write-strings, and so on. - Arg *WriteStrings = - Args.getLastArg(options::OPT_Wwrite_strings, - options::OPT_Wno_write_strings, options::OPT_w); - if (WriteStrings && - WriteStrings->getOption().matches(options::OPT_Wwrite_strings)) - CmdArgs.push_back("-fconst-strings"); - } - - // GCC provides a macro definition '__DEPRECATED' when -Wdeprecated is active - // during C++ compilation, which it is by default. GCC keeps this define even - // in the presence of '-w', match this behavior bug-for-bug. - if (types::isCXX(InputType) && - Args.hasFlag(options::OPT_Wdeprecated, options::OPT_Wno_deprecated, - true)) { - CmdArgs.push_back("-fdeprecated-macro"); - } - - // Translate GCC's misnamer '-fasm' arguments to '-fgnu-keywords'. - if (Arg *Asm = Args.getLastArg(options::OPT_fasm, options::OPT_fno_asm)) { - if (Asm->getOption().matches(options::OPT_fasm)) - CmdArgs.push_back("-fgnu-keywords"); - else - CmdArgs.push_back("-fno-gnu-keywords"); - } - - if (ShouldDisableDwarfDirectory(Args, getToolChain())) - CmdArgs.push_back("-fno-dwarf-directory-asm"); - - if (ShouldDisableAutolink(Args, getToolChain())) - CmdArgs.push_back("-fno-autolink"); - - // Add in -fdebug-compilation-dir if necessary. - addDebugCompDirArg(Args, CmdArgs); - - for (const Arg *A : Args.filtered(options::OPT_fdebug_prefix_map_EQ)) { - StringRef Map = A->getValue(); - if (Map.find('=') == StringRef::npos) - D.Diag(diag::err_drv_invalid_argument_to_fdebug_prefix_map) << Map; - else - CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map)); - A->claim(); - } - - if (Arg *A = Args.getLastArg(options::OPT_ftemplate_depth_, - options::OPT_ftemplate_depth_EQ)) { - CmdArgs.push_back("-ftemplate-depth"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_foperator_arrow_depth_EQ)) { - CmdArgs.push_back("-foperator-arrow-depth"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_depth_EQ)) { - CmdArgs.push_back("-fconstexpr-depth"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_steps_EQ)) { - CmdArgs.push_back("-fconstexpr-steps"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) { - CmdArgs.push_back("-fbracket-depth"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_Wlarge_by_value_copy_EQ, - options::OPT_Wlarge_by_value_copy_def)) { - if (A->getNumValues()) { - StringRef bytes = A->getValue(); - CmdArgs.push_back(Args.MakeArgString("-Wlarge-by-value-copy=" + bytes)); - } else - CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value - } - - if (Args.hasArg(options::OPT_relocatable_pch)) - CmdArgs.push_back("-relocatable-pch"); - - if (Arg *A = Args.getLastArg(options::OPT_fconstant_string_class_EQ)) { - CmdArgs.push_back("-fconstant-string-class"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_ftabstop_EQ)) { - CmdArgs.push_back("-ftabstop"); - CmdArgs.push_back(A->getValue()); - } - - CmdArgs.push_back("-ferror-limit"); - if (Arg *A = Args.getLastArg(options::OPT_ferror_limit_EQ)) - CmdArgs.push_back(A->getValue()); - else - CmdArgs.push_back("19"); - - if (Arg *A = Args.getLastArg(options::OPT_fmacro_backtrace_limit_EQ)) { - CmdArgs.push_back("-fmacro-backtrace-limit"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_ftemplate_backtrace_limit_EQ)) { - CmdArgs.push_back("-ftemplate-backtrace-limit"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_backtrace_limit_EQ)) { - CmdArgs.push_back("-fconstexpr-backtrace-limit"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg(options::OPT_fspell_checking_limit_EQ)) { - CmdArgs.push_back("-fspell-checking-limit"); - CmdArgs.push_back(A->getValue()); - } - - // Pass -fmessage-length=. - CmdArgs.push_back("-fmessage-length"); - if (Arg *A = Args.getLastArg(options::OPT_fmessage_length_EQ)) { - CmdArgs.push_back(A->getValue()); - } else { - // If -fmessage-length=N was not specified, determine whether this is a - // terminal and, if so, implicitly define -fmessage-length appropriately. - unsigned N = llvm::sys::Process::StandardErrColumns(); - CmdArgs.push_back(Args.MakeArgString(Twine(N))); - } - - // -fvisibility= and -fvisibility-ms-compat are of a piece. - if (const Arg *A = Args.getLastArg(options::OPT_fvisibility_EQ, - options::OPT_fvisibility_ms_compat)) { - if (A->getOption().matches(options::OPT_fvisibility_EQ)) { - CmdArgs.push_back("-fvisibility"); - CmdArgs.push_back(A->getValue()); - } else { - assert(A->getOption().matches(options::OPT_fvisibility_ms_compat)); - CmdArgs.push_back("-fvisibility"); - CmdArgs.push_back("hidden"); - CmdArgs.push_back("-ftype-visibility"); - CmdArgs.push_back("default"); - } - } - - Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden); - - Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ); - - // -fhosted is default. - bool IsHosted = true; - if (Args.hasFlag(options::OPT_ffreestanding, options::OPT_fhosted, false) || - KernelOrKext) { - CmdArgs.push_back("-ffreestanding"); - IsHosted = false; - } - - // Forward -f (flag) options which we can pass directly. - Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls); - Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions); - Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names); - // Emulated TLS is enabled by default on Android, and can be enabled manually - // with -femulated-tls. - bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isWindowsCygwinEnvironment(); - if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls, - EmulatedTLSDefault)) - CmdArgs.push_back("-femulated-tls"); - // AltiVec-like language extensions aren't relevant for assembling. - if (!isa(JA) || Output.getType() != types::TY_PP_Asm) { - Args.AddLastArg(CmdArgs, options::OPT_faltivec); - Args.AddLastArg(CmdArgs, options::OPT_fzvector); - } - Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree); - Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type); - - // Forward flags for OpenMP. We don't do this if the current action is an - // device offloading action other than OpenMP. - if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false) && - (JA.isDeviceOffloading(Action::OFK_None) || - JA.isDeviceOffloading(Action::OFK_OpenMP))) { - switch (getToolChain().getDriver().getOpenMPRuntime(Args)) { - case Driver::OMPRT_OMP: - case Driver::OMPRT_IOMP5: - // Clang can generate useful OpenMP code for these two runtime libraries. - CmdArgs.push_back("-fopenmp"); - - // If no option regarding the use of TLS in OpenMP codegeneration is - // given, decide a default based on the target. Otherwise rely on the - // options and pass the right information to the frontend. - if (!Args.hasFlag(options::OPT_fopenmp_use_tls, - options::OPT_fnoopenmp_use_tls, /*Default=*/true)) - CmdArgs.push_back("-fnoopenmp-use-tls"); - Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ); - break; - default: - // By default, if Clang doesn't know how to generate useful OpenMP code - // for a specific runtime library, we just don't pass the '-fopenmp' flag - // down to the actual compilation. - // FIXME: It would be better to have a mode which *only* omits IR - // generation based on the OpenMP support so that we get consistent - // semantic analysis, etc. - break; - } - } - - const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs(); - Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType); - - // Report an error for -faltivec on anything other than PowerPC. - if (const Arg *A = Args.getLastArg(options::OPT_faltivec)) { - const llvm::Triple::ArchType Arch = getToolChain().getArch(); - if (!(Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 || - Arch == llvm::Triple::ppc64le)) - D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) - << "ppc/ppc64/ppc64le"; - } - - // -fzvector is incompatible with -faltivec. - if (Arg *A = Args.getLastArg(options::OPT_fzvector)) - if (Args.hasArg(options::OPT_faltivec)) - D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) - << "-faltivec"; - - if (getToolChain().SupportsProfiling()) - Args.AddLastArg(CmdArgs, options::OPT_pg); - - // -flax-vector-conversions is default. - if (!Args.hasFlag(options::OPT_flax_vector_conversions, - options::OPT_fno_lax_vector_conversions)) - CmdArgs.push_back("-fno-lax-vector-conversions"); - - if (Args.getLastArg(options::OPT_fapple_kext) || - (Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType))) - CmdArgs.push_back("-fapple-kext"); - - Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch); - Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info); - Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits); - Args.AddLastArg(CmdArgs, options::OPT_ftime_report); - Args.AddLastArg(CmdArgs, options::OPT_ftrapv); - - if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) { - CmdArgs.push_back("-ftrapv-handler"); - CmdArgs.push_back(A->getValue()); - } - - Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ); - - // -fno-strict-overflow implies -fwrapv if it isn't disabled, but - // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. - if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { - if (A->getOption().matches(options::OPT_fwrapv)) - CmdArgs.push_back("-fwrapv"); - } else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow, - options::OPT_fno_strict_overflow)) { - if (A->getOption().matches(options::OPT_fno_strict_overflow)) - CmdArgs.push_back("-fwrapv"); - } - - if (Arg *A = Args.getLastArg(options::OPT_freroll_loops, - options::OPT_fno_reroll_loops)) - if (A->getOption().matches(options::OPT_freroll_loops)) - CmdArgs.push_back("-freroll-loops"); - - Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings); - Args.AddLastArg(CmdArgs, options::OPT_funroll_loops, - options::OPT_fno_unroll_loops); - - Args.AddLastArg(CmdArgs, options::OPT_pthread); - - // -stack-protector=0 is default. - unsigned StackProtectorLevel = 0; - if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector, - options::OPT_fstack_protector_all, - options::OPT_fstack_protector_strong, - options::OPT_fstack_protector)) { - if (A->getOption().matches(options::OPT_fstack_protector)) { - StackProtectorLevel = std::max( - LangOptions::SSPOn, - getToolChain().GetDefaultStackProtectorLevel(KernelOrKext)); - } else if (A->getOption().matches(options::OPT_fstack_protector_strong)) - StackProtectorLevel = LangOptions::SSPStrong; - else if (A->getOption().matches(options::OPT_fstack_protector_all)) - StackProtectorLevel = LangOptions::SSPReq; - } else { - StackProtectorLevel = - getToolChain().GetDefaultStackProtectorLevel(KernelOrKext); - // Only use a default stack protector on Darwin in case -ffreestanding - // is not specified. - if (Triple.isOSDarwin() && !IsHosted) - StackProtectorLevel = 0; - } - if (StackProtectorLevel) { - CmdArgs.push_back("-stack-protector"); - CmdArgs.push_back(Args.MakeArgString(Twine(StackProtectorLevel))); - } - - // --param ssp-buffer-size= - for (const Arg *A : Args.filtered(options::OPT__param)) { - StringRef Str(A->getValue()); - if (Str.startswith("ssp-buffer-size=")) { - if (StackProtectorLevel) { - CmdArgs.push_back("-stack-protector-buffer-size"); - // FIXME: Verify the argument is a valid integer. - CmdArgs.push_back(Args.MakeArgString(Str.drop_front(16))); - } - A->claim(); - } - } - - // Translate -mstackrealign - if (Args.hasFlag(options::OPT_mstackrealign, options::OPT_mno_stackrealign, - false)) - CmdArgs.push_back(Args.MakeArgString("-mstackrealign")); - - if (Args.hasArg(options::OPT_mstack_alignment)) { - StringRef alignment = Args.getLastArgValue(options::OPT_mstack_alignment); - CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment)); - } - - if (Args.hasArg(options::OPT_mstack_probe_size)) { - StringRef Size = Args.getLastArgValue(options::OPT_mstack_probe_size); - - if (!Size.empty()) - CmdArgs.push_back(Args.MakeArgString("-mstack-probe-size=" + Size)); - else - CmdArgs.push_back("-mstack-probe-size=0"); - } - - switch (getToolChain().getArch()) { - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - CmdArgs.push_back("-fallow-half-arguments-and-returns"); - break; - - default: - break; - } - - if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it, - options::OPT_mno_restrict_it)) { - if (A->getOption().matches(options::OPT_mrestrict_it)) { - CmdArgs.push_back("-backend-option"); - CmdArgs.push_back("-arm-restrict-it"); - } else { - CmdArgs.push_back("-backend-option"); - CmdArgs.push_back("-arm-no-restrict-it"); - } - } else if (Triple.isOSWindows() && - (Triple.getArch() == llvm::Triple::arm || - Triple.getArch() == llvm::Triple::thumb)) { - // Windows on ARM expects restricted IT blocks - CmdArgs.push_back("-backend-option"); - CmdArgs.push_back("-arm-restrict-it"); - } - - // Forward -cl options to -cc1 - if (Args.getLastArg(options::OPT_cl_opt_disable)) { - CmdArgs.push_back("-cl-opt-disable"); - } - if (Args.getLastArg(options::OPT_cl_strict_aliasing)) { - CmdArgs.push_back("-cl-strict-aliasing"); - } - if (Args.getLastArg(options::OPT_cl_single_precision_constant)) { - CmdArgs.push_back("-cl-single-precision-constant"); - } - if (Args.getLastArg(options::OPT_cl_finite_math_only)) { - CmdArgs.push_back("-cl-finite-math-only"); - } - if (Args.getLastArg(options::OPT_cl_kernel_arg_info)) { - CmdArgs.push_back("-cl-kernel-arg-info"); - } - if (Args.getLastArg(options::OPT_cl_unsafe_math_optimizations)) { - CmdArgs.push_back("-cl-unsafe-math-optimizations"); - } - if (Args.getLastArg(options::OPT_cl_fast_relaxed_math)) { - CmdArgs.push_back("-cl-fast-relaxed-math"); - } - if (Args.getLastArg(options::OPT_cl_mad_enable)) { - CmdArgs.push_back("-cl-mad-enable"); - } - if (Args.getLastArg(options::OPT_cl_no_signed_zeros)) { - CmdArgs.push_back("-cl-no-signed-zeros"); - } - if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) { - std::string CLStdStr = "-cl-std="; - CLStdStr += A->getValue(); - CmdArgs.push_back(Args.MakeArgString(CLStdStr)); - } - if (Args.getLastArg(options::OPT_cl_denorms_are_zero)) { - CmdArgs.push_back("-cl-denorms-are-zero"); - } - if (Args.getLastArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt)) { - CmdArgs.push_back("-cl-fp32-correctly-rounded-divide-sqrt"); - } - - // Forward -f options with positive and negative forms; we translate - // these by hand. - if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) { - StringRef fname = A->getValue(); - if (!llvm::sys::fs::exists(fname)) - D.Diag(diag::err_drv_no_such_file) << fname; - else - A->render(Args, CmdArgs); - } - - // -fbuiltin is default unless -mkernel is used. - bool UseBuiltins = - Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin, - !Args.hasArg(options::OPT_mkernel)); - if (!UseBuiltins) - CmdArgs.push_back("-fno-builtin"); - - // -ffreestanding implies -fno-builtin. - if (Args.hasArg(options::OPT_ffreestanding)) - UseBuiltins = false; - - // Process the -fno-builtin-* options. - for (const auto &Arg : Args) { - const Option &O = Arg->getOption(); - if (!O.matches(options::OPT_fno_builtin_)) - continue; - - Arg->claim(); - // If -fno-builtin is specified, then there's no need to pass the option to - // the frontend. - if (!UseBuiltins) - continue; - - StringRef FuncName = Arg->getValue(); - CmdArgs.push_back(Args.MakeArgString("-fno-builtin-" + FuncName)); - } - - if (!Args.hasFlag(options::OPT_fassume_sane_operator_new, - options::OPT_fno_assume_sane_operator_new)) - CmdArgs.push_back("-fno-assume-sane-operator-new"); - - // -fblocks=0 is default. - if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks, - getToolChain().IsBlocksDefault()) || - (Args.hasArg(options::OPT_fgnu_runtime) && - Args.hasArg(options::OPT_fobjc_nonfragile_abi) && - !Args.hasArg(options::OPT_fno_blocks))) { - CmdArgs.push_back("-fblocks"); - - if (!Args.hasArg(options::OPT_fgnu_runtime) && - !getToolChain().hasBlocksRuntime()) - CmdArgs.push_back("-fblocks-runtime-optional"); - } - - if (Args.hasFlag(options::OPT_fcoroutines_ts, options::OPT_fno_coroutines_ts, - false) && - types::isCXX(InputType)) { - CmdArgs.push_back("-fcoroutines-ts"); - } - - // -fmodules enables the use of precompiled modules (off by default). - // Users can pass -fno-cxx-modules to turn off modules support for - // C++/Objective-C++ programs. - bool HaveClangModules = false; - if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) { - bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules, - options::OPT_fno_cxx_modules, true); - if (AllowedInCXX || !types::isCXX(InputType)) { - CmdArgs.push_back("-fmodules"); - HaveClangModules = true; - } - } - - bool HaveAnyModules = HaveClangModules; - if (Args.hasArg(options::OPT_fmodules_ts)) { - CmdArgs.push_back("-fmodules-ts"); - HaveAnyModules = true; - } - - // -fmodule-maps enables implicit reading of module map files. By default, - // this is enabled if we are using Clang's flavor of precompiled modules. - if (Args.hasFlag(options::OPT_fimplicit_module_maps, - options::OPT_fno_implicit_module_maps, HaveClangModules)) { - CmdArgs.push_back("-fimplicit-module-maps"); - } - - // -fmodules-decluse checks that modules used are declared so (off by - // default). - if (Args.hasFlag(options::OPT_fmodules_decluse, - options::OPT_fno_modules_decluse, false)) { - CmdArgs.push_back("-fmodules-decluse"); - } - - // -fmodules-strict-decluse is like -fmodule-decluse, but also checks that - // all #included headers are part of modules. - if (Args.hasFlag(options::OPT_fmodules_strict_decluse, - options::OPT_fno_modules_strict_decluse, false)) { - CmdArgs.push_back("-fmodules-strict-decluse"); - } - - // -fno-implicit-modules turns off implicitly compiling modules on demand. - if (!Args.hasFlag(options::OPT_fimplicit_modules, - options::OPT_fno_implicit_modules, HaveClangModules)) { - if (HaveAnyModules) - CmdArgs.push_back("-fno-implicit-modules"); - } else if (HaveAnyModules) { - // -fmodule-cache-path specifies where our implicitly-built module files - // should be written. - SmallString<128> Path; - if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path)) - Path = A->getValue(); - if (C.isForDiagnostics()) { - // When generating crash reports, we want to emit the modules along with - // the reproduction sources, so we ignore any provided module path. - Path = Output.getFilename(); - llvm::sys::path::replace_extension(Path, ".cache"); - llvm::sys::path::append(Path, "modules"); - } else if (Path.empty()) { - // No module path was provided: use the default. - llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/false, Path); - llvm::sys::path::append(Path, "org.llvm.clang."); - appendUserToPath(Path); - llvm::sys::path::append(Path, "ModuleCache"); - } - const char Arg[] = "-fmodules-cache-path="; - Path.insert(Path.begin(), Arg, Arg + strlen(Arg)); - CmdArgs.push_back(Args.MakeArgString(Path)); - } - - if (HaveAnyModules) { - // -fprebuilt-module-path specifies where to load the prebuilt module files. - for (const Arg *A : Args.filtered(options::OPT_fprebuilt_module_path)) - CmdArgs.push_back(Args.MakeArgString( - std::string("-fprebuilt-module-path=") + A->getValue())); - } - - // -fmodule-name specifies the module that is currently being built (or - // used for header checking by -fmodule-maps). - Args.AddLastArg(CmdArgs, options::OPT_fmodule_name_EQ); - - // -fmodule-map-file can be used to specify files containing module - // definitions. - Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file); - - // -fbuiltin-module-map can be used to load the clang - // builtin headers modulemap file. - if (Args.hasArg(options::OPT_fbuiltin_module_map)) { - SmallString<128> BuiltinModuleMap(getToolChain().getDriver().ResourceDir); - llvm::sys::path::append(BuiltinModuleMap, "include"); - llvm::sys::path::append(BuiltinModuleMap, "module.modulemap"); - if (llvm::sys::fs::exists(BuiltinModuleMap)) { - CmdArgs.push_back(Args.MakeArgString("-fmodule-map-file=" + - BuiltinModuleMap)); - } - } - - // -fmodule-file can be used to specify files containing precompiled modules. - if (HaveAnyModules) - Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file); - else - Args.ClaimAllArgs(options::OPT_fmodule_file); - - // When building modules and generating crashdumps, we need to dump a module - // dependency VFS alongside the output. - if (HaveClangModules && C.isForDiagnostics()) { - SmallString<128> VFSDir(Output.getFilename()); - llvm::sys::path::replace_extension(VFSDir, ".cache"); - // Add the cache directory as a temp so the crash diagnostics pick it up. - C.addTempFile(Args.MakeArgString(VFSDir)); - - llvm::sys::path::append(VFSDir, "vfs"); - CmdArgs.push_back("-module-dependency-dir"); - CmdArgs.push_back(Args.MakeArgString(VFSDir)); - } - - if (HaveClangModules) - Args.AddLastArg(CmdArgs, options::OPT_fmodules_user_build_path); - - // Pass through all -fmodules-ignore-macro arguments. - Args.AddAllArgs(CmdArgs, options::OPT_fmodules_ignore_macro); - Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_interval); - Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_after); - - Args.AddLastArg(CmdArgs, options::OPT_fbuild_session_timestamp); - - if (Arg *A = Args.getLastArg(options::OPT_fbuild_session_file)) { - if (Args.hasArg(options::OPT_fbuild_session_timestamp)) - D.Diag(diag::err_drv_argument_not_allowed_with) - << A->getAsString(Args) << "-fbuild-session-timestamp"; - - llvm::sys::fs::file_status Status; - if (llvm::sys::fs::status(A->getValue(), Status)) - D.Diag(diag::err_drv_no_such_file) << A->getValue(); - CmdArgs.push_back( - Args.MakeArgString("-fbuild-session-timestamp=" + - Twine((uint64_t)Status.getLastModificationTime() - .time_since_epoch() - .count()))); - } - - if (Args.getLastArg(options::OPT_fmodules_validate_once_per_build_session)) { - if (!Args.getLastArg(options::OPT_fbuild_session_timestamp, - options::OPT_fbuild_session_file)) - D.Diag(diag::err_drv_modules_validate_once_requires_timestamp); - - Args.AddLastArg(CmdArgs, - options::OPT_fmodules_validate_once_per_build_session); - } - - Args.AddLastArg(CmdArgs, options::OPT_fmodules_validate_system_headers); - Args.AddLastArg(CmdArgs, options::OPT_fmodules_disable_diagnostic_validation); - - // -faccess-control is default. - if (Args.hasFlag(options::OPT_fno_access_control, - options::OPT_faccess_control, false)) - CmdArgs.push_back("-fno-access-control"); - - // -felide-constructors is the default. - if (Args.hasFlag(options::OPT_fno_elide_constructors, - options::OPT_felide_constructors, false)) - CmdArgs.push_back("-fno-elide-constructors"); - - ToolChain::RTTIMode RTTIMode = getToolChain().getRTTIMode(); - - if (KernelOrKext || (types::isCXX(InputType) && - (RTTIMode == ToolChain::RM_DisabledExplicitly || - RTTIMode == ToolChain::RM_DisabledImplicitly))) - CmdArgs.push_back("-fno-rtti"); - - // -fshort-enums=0 is default for all architectures except Hexagon. - if (Args.hasFlag(options::OPT_fshort_enums, options::OPT_fno_short_enums, - getToolChain().getArch() == llvm::Triple::hexagon)) - CmdArgs.push_back("-fshort-enums"); - - // -fsigned-char is default. - if (Arg *A = Args.getLastArg( - options::OPT_fsigned_char, options::OPT_fno_signed_char, - options::OPT_funsigned_char, options::OPT_fno_unsigned_char)) { - if (A->getOption().matches(options::OPT_funsigned_char) || - A->getOption().matches(options::OPT_fno_signed_char)) { - CmdArgs.push_back("-fno-signed-char"); - } - } else if (!isSignedCharDefault(getToolChain().getTriple())) { - CmdArgs.push_back("-fno-signed-char"); - } - - // -fuse-cxa-atexit is default. - if (!Args.hasFlag( - options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit, - !IsWindowsCygnus && !IsWindowsGNU && - getToolChain().getTriple().getOS() != llvm::Triple::Solaris && - getToolChain().getArch() != llvm::Triple::hexagon && - getToolChain().getArch() != llvm::Triple::xcore && - ((getToolChain().getTriple().getVendor() != - llvm::Triple::MipsTechnologies) || - getToolChain().getTriple().hasEnvironment())) || - KernelOrKext) - CmdArgs.push_back("-fno-use-cxa-atexit"); - - // -fms-extensions=0 is default. - if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions, - IsWindowsMSVC)) - CmdArgs.push_back("-fms-extensions"); - - // -fno-use-line-directives is default. - if (Args.hasFlag(options::OPT_fuse_line_directives, - options::OPT_fno_use_line_directives, false)) - CmdArgs.push_back("-fuse-line-directives"); - - // -fms-compatibility=0 is default. - if (Args.hasFlag(options::OPT_fms_compatibility, - options::OPT_fno_ms_compatibility, - (IsWindowsMSVC && - Args.hasFlag(options::OPT_fms_extensions, - options::OPT_fno_ms_extensions, true)))) - CmdArgs.push_back("-fms-compatibility"); - - VersionTuple MSVT = - getToolChain().computeMSVCVersion(&getToolChain().getDriver(), Args); - if (!MSVT.empty()) - CmdArgs.push_back( - Args.MakeArgString("-fms-compatibility-version=" + MSVT.getAsString())); - - bool IsMSVC2015Compatible = MSVT.getMajor() >= 19; - if (ImplyVCPPCXXVer) { - StringRef LanguageStandard; - if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) { - LanguageStandard = llvm::StringSwitch(StdArg->getValue()) - .Case("c++14", "-std=c++14") - .Case("c++latest", "-std=c++1z") - .Default(""); - if (LanguageStandard.empty()) - D.Diag(clang::diag::warn_drv_unused_argument) - << StdArg->getAsString(Args); - } - - if (LanguageStandard.empty()) { - if (IsMSVC2015Compatible) - LanguageStandard = "-std=c++14"; - else - LanguageStandard = "-std=c++11"; - } - - CmdArgs.push_back(LanguageStandard.data()); - } - - // -fno-borland-extensions is default. - if (Args.hasFlag(options::OPT_fborland_extensions, - options::OPT_fno_borland_extensions, false)) - CmdArgs.push_back("-fborland-extensions"); - - // -fno-declspec is default, except for PS4. - if (Args.hasFlag(options::OPT_fdeclspec, options::OPT_fno_declspec, - getToolChain().getTriple().isPS4())) - CmdArgs.push_back("-fdeclspec"); - else if (Args.hasArg(options::OPT_fno_declspec)) - CmdArgs.push_back("-fno-declspec"); // Explicitly disabling __declspec. - - // -fthreadsafe-static is default, except for MSVC compatibility versions less - // than 19. - if (!Args.hasFlag(options::OPT_fthreadsafe_statics, - options::OPT_fno_threadsafe_statics, - !IsWindowsMSVC || IsMSVC2015Compatible)) - CmdArgs.push_back("-fno-threadsafe-statics"); - - // -fno-delayed-template-parsing is default, except for Windows where MSVC STL - // needs it. - if (Args.hasFlag(options::OPT_fdelayed_template_parsing, - options::OPT_fno_delayed_template_parsing, IsWindowsMSVC)) - CmdArgs.push_back("-fdelayed-template-parsing"); - - // -fgnu-keywords default varies depending on language; only pass if - // specified. - if (Arg *A = Args.getLastArg(options::OPT_fgnu_keywords, - options::OPT_fno_gnu_keywords)) - A->render(Args, CmdArgs); - - if (Args.hasFlag(options::OPT_fgnu89_inline, options::OPT_fno_gnu89_inline, - false)) - CmdArgs.push_back("-fgnu89-inline"); - - if (Args.hasArg(options::OPT_fno_inline)) - CmdArgs.push_back("-fno-inline"); - - if (Arg* InlineArg = Args.getLastArg(options::OPT_finline_functions, - options::OPT_finline_hint_functions, - options::OPT_fno_inline_functions)) - InlineArg->render(Args, CmdArgs); - - ObjCRuntime objcRuntime = AddObjCRuntimeArgs(Args, CmdArgs, rewriteKind); - - // -fobjc-dispatch-method is only relevant with the nonfragile-abi, and - // legacy is the default. Except for deployment target of 10.5, - // next runtime is always legacy dispatch and -fno-objc-legacy-dispatch - // gets ignored silently. - if (objcRuntime.isNonFragile()) { - if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch, - options::OPT_fno_objc_legacy_dispatch, - objcRuntime.isLegacyDispatchDefaultForArch( - getToolChain().getArch()))) { - if (getToolChain().UseObjCMixedDispatch()) - CmdArgs.push_back("-fobjc-dispatch-method=mixed"); - else - CmdArgs.push_back("-fobjc-dispatch-method=non-legacy"); - } - } - - // When ObjectiveC legacy runtime is in effect on MacOSX, - // turn on the option to do Array/Dictionary subscripting - // by default. - if (getToolChain().getArch() == llvm::Triple::x86 && - getToolChain().getTriple().isMacOSX() && - !getToolChain().getTriple().isMacOSXVersionLT(10, 7) && - objcRuntime.getKind() == ObjCRuntime::FragileMacOSX && - objcRuntime.isNeXTFamily()) - CmdArgs.push_back("-fobjc-subscripting-legacy-runtime"); - - // -fencode-extended-block-signature=1 is default. - if (getToolChain().IsEncodeExtendedBlockSignatureDefault()) { - CmdArgs.push_back("-fencode-extended-block-signature"); - } - - // Allow -fno-objc-arr to trump -fobjc-arr/-fobjc-arc. - // NOTE: This logic is duplicated in ToolChains.cpp. - bool ARC = isObjCAutoRefCount(Args); - if (ARC) { - getToolChain().CheckObjCARC(); - - CmdArgs.push_back("-fobjc-arc"); - - // FIXME: It seems like this entire block, and several around it should be - // wrapped in isObjC, but for now we just use it here as this is where it - // was being used previously. - if (types::isCXX(InputType) && types::isObjC(InputType)) { - if (getToolChain().GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) - CmdArgs.push_back("-fobjc-arc-cxxlib=libc++"); - else - CmdArgs.push_back("-fobjc-arc-cxxlib=libstdc++"); - } - - // Allow the user to enable full exceptions code emission. - // We define off for Objective-CC, on for Objective-C++. - if (Args.hasFlag(options::OPT_fobjc_arc_exceptions, - options::OPT_fno_objc_arc_exceptions, - /*default*/ types::isCXX(InputType))) - CmdArgs.push_back("-fobjc-arc-exceptions"); - - } - - // -fobjc-infer-related-result-type is the default, except in the Objective-C - // rewriter. - if (rewriteKind != RK_None) - CmdArgs.push_back("-fno-objc-infer-related-result-type"); - - // Pass down -fobjc-weak or -fno-objc-weak if present. - if (types::isObjC(InputType)) { - auto WeakArg = Args.getLastArg(options::OPT_fobjc_weak, - options::OPT_fno_objc_weak); - if (!WeakArg) { - // nothing to do - } else if (!objcRuntime.allowsWeak()) { - if (WeakArg->getOption().matches(options::OPT_fobjc_weak)) - D.Diag(diag::err_objc_weak_unsupported); - } else { - WeakArg->render(Args, CmdArgs); - } - } - - if (Args.hasFlag(options::OPT_fapplication_extension, - options::OPT_fno_application_extension, false)) - CmdArgs.push_back("-fapplication-extension"); - - // Handle GCC-style exception args. - if (!C.getDriver().IsCLMode()) - addExceptionArgs(Args, InputType, getToolChain(), KernelOrKext, objcRuntime, - CmdArgs); - - if (Args.hasArg(options::OPT_fsjlj_exceptions) || - getToolChain().UseSjLjExceptions(Args)) - CmdArgs.push_back("-fsjlj-exceptions"); - - // C++ "sane" operator new. - if (!Args.hasFlag(options::OPT_fassume_sane_operator_new, - options::OPT_fno_assume_sane_operator_new)) - CmdArgs.push_back("-fno-assume-sane-operator-new"); - - // -fsized-deallocation is off by default, as it is an ABI-breaking change for - // most platforms. - if (Args.hasFlag(options::OPT_fsized_deallocation, - options::OPT_fno_sized_deallocation, false)) - CmdArgs.push_back("-fsized-deallocation"); - - // -faligned-allocation is on by default in C++17 onwards and otherwise off - // by default. - if (Arg *A = Args.getLastArg(options::OPT_faligned_allocation, - options::OPT_fno_aligned_allocation, - options::OPT_faligned_new_EQ)) { - if (A->getOption().matches(options::OPT_fno_aligned_allocation)) - CmdArgs.push_back("-fno-aligned-allocation"); - else - CmdArgs.push_back("-faligned-allocation"); - } - - // The default new alignment can be specified using a dedicated option or via - // a GCC-compatible option that also turns on aligned allocation. - if (Arg *A = Args.getLastArg(options::OPT_fnew_alignment_EQ, - options::OPT_faligned_new_EQ)) - CmdArgs.push_back( - Args.MakeArgString(Twine("-fnew-alignment=") + A->getValue())); - - // -fconstant-cfstrings is default, and may be subject to argument translation - // on Darwin. - if (!Args.hasFlag(options::OPT_fconstant_cfstrings, - options::OPT_fno_constant_cfstrings) || - !Args.hasFlag(options::OPT_mconstant_cfstrings, - options::OPT_mno_constant_cfstrings)) - CmdArgs.push_back("-fno-constant-cfstrings"); - - // -fshort-wchar default varies depending on platform; only - // pass if specified. - if (Arg *A = Args.getLastArg(options::OPT_fshort_wchar, - options::OPT_fno_short_wchar)) - A->render(Args, CmdArgs); - - // -fno-pascal-strings is default, only pass non-default. - if (Args.hasFlag(options::OPT_fpascal_strings, - options::OPT_fno_pascal_strings, false)) - CmdArgs.push_back("-fpascal-strings"); - - // Honor -fpack-struct= and -fpack-struct, if given. Note that - // -fno-pack-struct doesn't apply to -fpack-struct=. - if (Arg *A = Args.getLastArg(options::OPT_fpack_struct_EQ)) { - std::string PackStructStr = "-fpack-struct="; - PackStructStr += A->getValue(); - CmdArgs.push_back(Args.MakeArgString(PackStructStr)); - } else if (Args.hasFlag(options::OPT_fpack_struct, - options::OPT_fno_pack_struct, false)) { - CmdArgs.push_back("-fpack-struct=1"); - } - - // Handle -fmax-type-align=N and -fno-type-align - bool SkipMaxTypeAlign = Args.hasArg(options::OPT_fno_max_type_align); - if (Arg *A = Args.getLastArg(options::OPT_fmax_type_align_EQ)) { - if (!SkipMaxTypeAlign) { - std::string MaxTypeAlignStr = "-fmax-type-align="; - MaxTypeAlignStr += A->getValue(); - CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr)); - } - } else if (getToolChain().getTriple().isOSDarwin()) { - if (!SkipMaxTypeAlign) { - std::string MaxTypeAlignStr = "-fmax-type-align=16"; - CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr)); - } - } - - // -fcommon is the default unless compiling kernel code or the target says so - bool NoCommonDefault = - KernelOrKext || isNoCommonDefault(getToolChain().getTriple()); - if (!Args.hasFlag(options::OPT_fcommon, options::OPT_fno_common, - !NoCommonDefault)) - CmdArgs.push_back("-fno-common"); - - // -fsigned-bitfields is default, and clang doesn't yet support - // -funsigned-bitfields. - if (!Args.hasFlag(options::OPT_fsigned_bitfields, - options::OPT_funsigned_bitfields)) - D.Diag(diag::warn_drv_clang_unsupported) - << Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args); - - // -fsigned-bitfields is default, and clang doesn't support -fno-for-scope. - if (!Args.hasFlag(options::OPT_ffor_scope, options::OPT_fno_for_scope)) - D.Diag(diag::err_drv_clang_unsupported) - << Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args); - - // -finput_charset=UTF-8 is default. Reject others - if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) { - StringRef value = inputCharset->getValue(); - if (!value.equals_lower("utf-8")) - D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args) - << value; - } - - // -fexec_charset=UTF-8 is default. Reject others - if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) { - StringRef value = execCharset->getValue(); - if (!value.equals_lower("utf-8")) - D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args) - << value; - } - - // -fcaret-diagnostics is default. - if (!Args.hasFlag(options::OPT_fcaret_diagnostics, - options::OPT_fno_caret_diagnostics, true)) - CmdArgs.push_back("-fno-caret-diagnostics"); - - // -fdiagnostics-fixit-info is default, only pass non-default. - if (!Args.hasFlag(options::OPT_fdiagnostics_fixit_info, - options::OPT_fno_diagnostics_fixit_info)) - CmdArgs.push_back("-fno-diagnostics-fixit-info"); - - // Enable -fdiagnostics-show-option by default. - if (Args.hasFlag(options::OPT_fdiagnostics_show_option, - options::OPT_fno_diagnostics_show_option)) - CmdArgs.push_back("-fdiagnostics-show-option"); - - if (const Arg *A = - Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) { - CmdArgs.push_back("-fdiagnostics-show-category"); - CmdArgs.push_back(A->getValue()); - } - - if (Args.hasFlag(options::OPT_fdiagnostics_show_hotness, - options::OPT_fno_diagnostics_show_hotness, false)) - CmdArgs.push_back("-fdiagnostics-show-hotness"); - - if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) { - CmdArgs.push_back("-fdiagnostics-format"); - CmdArgs.push_back(A->getValue()); - } - - if (Arg *A = Args.getLastArg( - options::OPT_fdiagnostics_show_note_include_stack, - options::OPT_fno_diagnostics_show_note_include_stack)) { - if (A->getOption().matches( - options::OPT_fdiagnostics_show_note_include_stack)) - CmdArgs.push_back("-fdiagnostics-show-note-include-stack"); - else - CmdArgs.push_back("-fno-diagnostics-show-note-include-stack"); - } - - // Color diagnostics are parsed by the driver directly from argv - // and later re-parsed to construct this job; claim any possible - // color diagnostic here to avoid warn_drv_unused_argument and - // diagnose bad OPT_fdiagnostics_color_EQ values. - for (Arg *A : Args) { - const Option &O = A->getOption(); - if (!O.matches(options::OPT_fcolor_diagnostics) && - !O.matches(options::OPT_fdiagnostics_color) && - !O.matches(options::OPT_fno_color_diagnostics) && - !O.matches(options::OPT_fno_diagnostics_color) && - !O.matches(options::OPT_fdiagnostics_color_EQ)) - continue; - if (O.matches(options::OPT_fdiagnostics_color_EQ)) { - StringRef Value(A->getValue()); - if (Value != "always" && Value != "never" && Value != "auto") - getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) - << ("-fdiagnostics-color=" + Value).str(); - } - A->claim(); - } - if (D.getDiags().getDiagnosticOptions().ShowColors) - CmdArgs.push_back("-fcolor-diagnostics"); - - if (Args.hasArg(options::OPT_fansi_escape_codes)) - CmdArgs.push_back("-fansi-escape-codes"); - - if (!Args.hasFlag(options::OPT_fshow_source_location, - options::OPT_fno_show_source_location)) - CmdArgs.push_back("-fno-show-source-location"); - - if (Args.hasArg(options::OPT_fdiagnostics_absolute_paths)) - CmdArgs.push_back("-fdiagnostics-absolute-paths"); - - if (!Args.hasFlag(options::OPT_fshow_column, options::OPT_fno_show_column, - true)) - CmdArgs.push_back("-fno-show-column"); - - if (!Args.hasFlag(options::OPT_fspell_checking, - options::OPT_fno_spell_checking)) - CmdArgs.push_back("-fno-spell-checking"); - - // -fno-asm-blocks is default. - if (Args.hasFlag(options::OPT_fasm_blocks, options::OPT_fno_asm_blocks, - false)) - CmdArgs.push_back("-fasm-blocks"); - - // -fgnu-inline-asm is default. - if (!Args.hasFlag(options::OPT_fgnu_inline_asm, - options::OPT_fno_gnu_inline_asm, true)) - CmdArgs.push_back("-fno-gnu-inline-asm"); - - // Enable vectorization per default according to the optimization level - // selected. For optimization levels that want vectorization we use the alias - // option to simplify the hasFlag logic. - bool EnableVec = shouldEnableVectorizerAtOLevel(Args, false); - OptSpecifier VectorizeAliasOption = - EnableVec ? options::OPT_O_Group : options::OPT_fvectorize; - if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption, - options::OPT_fno_vectorize, EnableVec)) - CmdArgs.push_back("-vectorize-loops"); - - // -fslp-vectorize is enabled based on the optimization level selected. - bool EnableSLPVec = shouldEnableVectorizerAtOLevel(Args, true); - OptSpecifier SLPVectAliasOption = - EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize; - if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption, - options::OPT_fno_slp_vectorize, EnableSLPVec)) - CmdArgs.push_back("-vectorize-slp"); - - // -fno-slp-vectorize-aggressive is default. - if (Args.hasFlag(options::OPT_fslp_vectorize_aggressive, - options::OPT_fno_slp_vectorize_aggressive, false)) - CmdArgs.push_back("-vectorize-slp-aggressive"); - - if (Arg *A = Args.getLastArg(options::OPT_fshow_overloads_EQ)) - A->render(Args, CmdArgs); - - if (Arg *A = Args.getLastArg( - options::OPT_fsanitize_undefined_strip_path_components_EQ)) - A->render(Args, CmdArgs); - - // -fdollars-in-identifiers default varies depending on platform and - // language; only pass if specified. - if (Arg *A = Args.getLastArg(options::OPT_fdollars_in_identifiers, - options::OPT_fno_dollars_in_identifiers)) { - if (A->getOption().matches(options::OPT_fdollars_in_identifiers)) - CmdArgs.push_back("-fdollars-in-identifiers"); - else - CmdArgs.push_back("-fno-dollars-in-identifiers"); - } - - // -funit-at-a-time is default, and we don't support -fno-unit-at-a-time for - // practical purposes. - if (Arg *A = Args.getLastArg(options::OPT_funit_at_a_time, - options::OPT_fno_unit_at_a_time)) { - if (A->getOption().matches(options::OPT_fno_unit_at_a_time)) - D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args); - } - - if (Args.hasFlag(options::OPT_fapple_pragma_pack, - options::OPT_fno_apple_pragma_pack, false)) - CmdArgs.push_back("-fapple-pragma-pack"); - - // le32-specific flags: - // -fno-math-builtin: clang should not convert math builtins to intrinsics - // by default. - if (getToolChain().getArch() == llvm::Triple::le32) { - CmdArgs.push_back("-fno-math-builtin"); - } - - if (Args.hasFlag(options::OPT_fsave_optimization_record, - options::OPT_fno_save_optimization_record, false)) { - CmdArgs.push_back("-opt-record-file"); - - const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ); - if (A) { - CmdArgs.push_back(A->getValue()); - } else { - SmallString<128> F; - if (Output.isFilename() && (Args.hasArg(options::OPT_c) || - Args.hasArg(options::OPT_S))) { - F = Output.getFilename(); - } else { - // Use the input filename. - F = llvm::sys::path::stem(Input.getBaseInput()); - - // If we're compiling for an offload architecture (i.e. a CUDA device), - // we need to make the file name for the device compilation different - // from the host compilation. - if (!JA.isDeviceOffloading(Action::OFK_None) && - !JA.isDeviceOffloading(Action::OFK_Host)) { - llvm::sys::path::replace_extension(F, ""); - F += Action::GetOffloadingFileNamePrefix(JA.getOffloadingDeviceKind(), - Triple.normalize()); - F += "-"; - F += JA.getOffloadingArch(); - } - } - - llvm::sys::path::replace_extension(F, "opt.yaml"); - CmdArgs.push_back(Args.MakeArgString(F)); - } - } - -// Default to -fno-builtin-str{cat,cpy} on Darwin for ARM. -// -// FIXME: Now that PR4941 has been fixed this can be enabled. -#if 0 - if (getToolChain().getTriple().isOSDarwin() && - (getToolChain().getArch() == llvm::Triple::arm || - getToolChain().getArch() == llvm::Triple::thumb)) { - if (!Args.hasArg(options::OPT_fbuiltin_strcat)) - CmdArgs.push_back("-fno-builtin-strcat"); - if (!Args.hasArg(options::OPT_fbuiltin_strcpy)) - CmdArgs.push_back("-fno-builtin-strcpy"); - } -#endif - - // Enable rewrite includes if the user's asked for it or if we're generating - // diagnostics. - // TODO: Once -module-dependency-dir works with -frewrite-includes it'd be - // nice to enable this when doing a crashdump for modules as well. - if (Args.hasFlag(options::OPT_frewrite_includes, - options::OPT_fno_rewrite_includes, false) || - (C.isForDiagnostics() && !HaveAnyModules)) - CmdArgs.push_back("-frewrite-includes"); - - // Only allow -traditional or -traditional-cpp outside in preprocessing modes. - if (Arg *A = Args.getLastArg(options::OPT_traditional, - options::OPT_traditional_cpp)) { - if (isa(JA)) - CmdArgs.push_back("-traditional-cpp"); - else - D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); - } - - Args.AddLastArg(CmdArgs, options::OPT_dM); - Args.AddLastArg(CmdArgs, options::OPT_dD); - - // Handle serialized diagnostics. - if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) { - CmdArgs.push_back("-serialize-diagnostic-file"); - CmdArgs.push_back(Args.MakeArgString(A->getValue())); - } - - if (Args.hasArg(options::OPT_fretain_comments_from_system_headers)) - CmdArgs.push_back("-fretain-comments-from-system-headers"); - - // Forward -fcomment-block-commands to -cc1. - Args.AddAllArgs(CmdArgs, options::OPT_fcomment_block_commands); - // Forward -fparse-all-comments to -cc1. - Args.AddAllArgs(CmdArgs, options::OPT_fparse_all_comments); - - // Turn -fplugin=name.so into -load name.so - for (const Arg *A : Args.filtered(options::OPT_fplugin_EQ)) { - CmdArgs.push_back("-load"); - CmdArgs.push_back(A->getValue()); - A->claim(); - } - - // Setup statistics file output. - if (const Arg *A = Args.getLastArg(options::OPT_save_stats_EQ)) { - StringRef SaveStats = A->getValue(); - - SmallString<128> StatsFile; - bool DoSaveStats = false; - if (SaveStats == "obj") { - if (Output.isFilename()) { - StatsFile.assign(Output.getFilename()); - llvm::sys::path::remove_filename(StatsFile); - } - DoSaveStats = true; - } else if (SaveStats == "cwd") { - DoSaveStats = true; - } else { - D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << SaveStats; - } - - if (DoSaveStats) { - StringRef BaseName = llvm::sys::path::filename(Input.getBaseInput()); - llvm::sys::path::append(StatsFile, BaseName); - llvm::sys::path::replace_extension(StatsFile, "stats"); - CmdArgs.push_back(Args.MakeArgString(Twine("-stats-file=") + - StatsFile)); - } - } - - // Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option - // parser. - Args.AddAllArgValues(CmdArgs, options::OPT_Xclang); - for (const Arg *A : Args.filtered(options::OPT_mllvm)) { - A->claim(); - - // We translate this by hand to the -cc1 argument, since nightly test uses - // it and developers have been trained to spell it with -mllvm. - if (StringRef(A->getValue(0)) == "-disable-llvm-optzns") { - CmdArgs.push_back("-disable-llvm-optzns"); - } else - A->render(Args, CmdArgs); - } - - // With -save-temps, we want to save the unoptimized bitcode output from the - // CompileJobAction, use -disable-llvm-passes to get pristine IR generated - // by the frontend. - // When -fembed-bitcode is enabled, optimized bitcode is emitted because it - // has slightly different breakdown between stages. - // FIXME: -fembed-bitcode -save-temps will save optimized bitcode instead of - // pristine IR generated by the frontend. Ideally, a new compile action should - // be added so both IR can be captured. - if (C.getDriver().isSaveTempsEnabled() && - !C.getDriver().embedBitcodeInObject() && isa(JA)) - CmdArgs.push_back("-disable-llvm-passes"); - - if (Output.getType() == types::TY_Dependencies) { - // Handled with other dependency code. - } else if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - addDashXForInput(Args, Input, CmdArgs); - - if (Input.isFilename()) - CmdArgs.push_back(Input.getFilename()); - else - Input.getInputArg().renderAsInput(Args, CmdArgs); - - Args.AddAllArgs(CmdArgs, options::OPT_undef); - - const char *Exec = getToolChain().getDriver().getClangProgramPath(); - - // Optionally embed the -cc1 level arguments into the debug info, for build - // analysis. - if (getToolChain().UseDwarfDebugFlags()) { - ArgStringList OriginalArgs; - for (const auto &Arg : Args) - Arg->render(Args, OriginalArgs); - - SmallString<256> Flags; - Flags += Exec; - for (const char *OriginalArg : OriginalArgs) { - SmallString<128> EscapedArg; - EscapeSpacesAndBackslashes(OriginalArg, EscapedArg); - Flags += " "; - Flags += EscapedArg; - } - CmdArgs.push_back("-dwarf-debug-flags"); - CmdArgs.push_back(Args.MakeArgString(Flags)); - } - - // Add the split debug info name to the command lines here so we - // can propagate it to the backend. - bool SplitDwarf = SplitDwarfArg && getToolChain().getTriple().isOSLinux() && - (isa(JA) || isa(JA) || - isa(JA)); - const char *SplitDwarfOut; - if (SplitDwarf) { - CmdArgs.push_back("-split-dwarf-file"); - SplitDwarfOut = SplitDebugName(Args, Input); - CmdArgs.push_back(SplitDwarfOut); - } - - // Host-side cuda compilation receives device-side outputs as Inputs[1...]. - // Include them with -fcuda-include-gpubinary. - if (IsCuda && Inputs.size() > 1) - for (auto I = std::next(Inputs.begin()), E = Inputs.end(); I != E; ++I) { - CmdArgs.push_back("-fcuda-include-gpubinary"); - CmdArgs.push_back(I->getFilename()); - } - - // OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path - // to specify the result of the compile phase on the host, so the meaningful - // device declarations can be identified. Also, -fopenmp-is-device is passed - // along to tell the frontend that it is generating code for a device, so that - // only the relevant declarations are emitted. - if (IsOpenMPDevice && Inputs.size() == 2) { - CmdArgs.push_back("-fopenmp-is-device"); - CmdArgs.push_back("-fopenmp-host-ir-file-path"); - CmdArgs.push_back(Args.MakeArgString(Inputs.back().getFilename())); - } - - // For all the host OpenMP offloading compile jobs we need to pass the targets - // information using -fopenmp-targets= option. - if (isa(JA) && JA.isHostOffloading(Action::OFK_OpenMP)) { - SmallString<128> TargetInfo("-fopenmp-targets="); - - Arg *Tgts = Args.getLastArg(options::OPT_fopenmp_targets_EQ); - assert(Tgts && Tgts->getNumValues() && - "OpenMP offloading has to have targets specified."); - for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { - if (i) - TargetInfo += ','; - // We need to get the string from the triple because it may be not exactly - // the same as the one we get directly from the arguments. - llvm::Triple T(Tgts->getValue(i)); - TargetInfo += T.getTriple(); - } - CmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); - } - - bool WholeProgramVTables = - Args.hasFlag(options::OPT_fwhole_program_vtables, - options::OPT_fno_whole_program_vtables, false); - if (WholeProgramVTables) { - if (!D.isUsingLTO()) - D.Diag(diag::err_drv_argument_only_allowed_with) - << "-fwhole-program-vtables" - << "-flto"; - CmdArgs.push_back("-fwhole-program-vtables"); - } - - // Finally add the compile command to the compilation. - if (Args.hasArg(options::OPT__SLASH_fallback) && - Output.getType() == types::TY_Object && - (InputType == types::TY_C || InputType == types::TY_CXX)) { - auto CLCommand = - getCLFallback()->GetCommand(C, JA, Output, Inputs, Args, LinkingOutput); - C.addCommand(llvm::make_unique( - JA, *this, Exec, CmdArgs, Inputs, std::move(CLCommand))); - } else if (Args.hasArg(options::OPT__SLASH_fallback) && - isa(JA)) { - // In /fallback builds, run the main compilation even if the pch generation - // fails, so that the main compilation's fallback to cl.exe runs. - C.addCommand(llvm::make_unique(JA, *this, Exec, - CmdArgs, Inputs)); - } else { - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); - } - - // Handle the debug info splitting at object creation time if we're - // creating an object. - // TODO: Currently only works on linux with newer objcopy. - if (SplitDwarf && Output.getType() == types::TY_Object) - SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDwarfOut); - - if (Arg *A = Args.getLastArg(options::OPT_pg)) - if (Args.hasArg(options::OPT_fomit_frame_pointer)) - D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer" - << A->getAsString(Args); - - // Claim some arguments which clang supports automatically. - - // -fpch-preprocess is used with gcc to add a special marker in the output to - // include the PCH file. Clang's PTH solution is completely transparent, so we - // do not need to deal with it at all. - Args.ClaimAllArgs(options::OPT_fpch_preprocess); - - // Claim some arguments which clang doesn't support, but we don't - // care to warn the user about. - Args.ClaimAllArgs(options::OPT_clang_ignored_f_Group); - Args.ClaimAllArgs(options::OPT_clang_ignored_m_Group); - - // Disable warnings for clang -E -emit-llvm foo.c - Args.ClaimAllArgs(options::OPT_emit_llvm); -} - -/// Add options related to the Objective-C runtime/ABI. -/// -/// Returns true if the runtime is non-fragile. -ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args, - ArgStringList &cmdArgs, - RewriteKind rewriteKind) const { - // Look for the controlling runtime option. - Arg *runtimeArg = - args.getLastArg(options::OPT_fnext_runtime, options::OPT_fgnu_runtime, - options::OPT_fobjc_runtime_EQ); - - // Just forward -fobjc-runtime= to the frontend. This supercedes - // options about fragility. - if (runtimeArg && - runtimeArg->getOption().matches(options::OPT_fobjc_runtime_EQ)) { - ObjCRuntime runtime; - StringRef value = runtimeArg->getValue(); - if (runtime.tryParse(value)) { - getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime) - << value; - } - - runtimeArg->render(args, cmdArgs); - return runtime; - } - - // Otherwise, we'll need the ABI "version". Version numbers are - // slightly confusing for historical reasons: - // 1 - Traditional "fragile" ABI - // 2 - Non-fragile ABI, version 1 - // 3 - Non-fragile ABI, version 2 - unsigned objcABIVersion = 1; - // If -fobjc-abi-version= is present, use that to set the version. - if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_abi_version_EQ)) { - StringRef value = abiArg->getValue(); - if (value == "1") - objcABIVersion = 1; - else if (value == "2") - objcABIVersion = 2; - else if (value == "3") - objcABIVersion = 3; - else - getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value; - } else { - // Otherwise, determine if we are using the non-fragile ABI. - bool nonFragileABIIsDefault = - (rewriteKind == RK_NonFragile || - (rewriteKind == RK_None && - getToolChain().IsObjCNonFragileABIDefault())); - if (args.hasFlag(options::OPT_fobjc_nonfragile_abi, - options::OPT_fno_objc_nonfragile_abi, - nonFragileABIIsDefault)) { -// Determine the non-fragile ABI version to use. -#ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO - unsigned nonFragileABIVersion = 1; -#else - unsigned nonFragileABIVersion = 2; -#endif - - if (Arg *abiArg = - args.getLastArg(options::OPT_fobjc_nonfragile_abi_version_EQ)) { - StringRef value = abiArg->getValue(); - if (value == "1") - nonFragileABIVersion = 1; - else if (value == "2") - nonFragileABIVersion = 2; - else - getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) - << value; - } - - objcABIVersion = 1 + nonFragileABIVersion; - } else { - objcABIVersion = 1; - } - } - - // We don't actually care about the ABI version other than whether - // it's non-fragile. - bool isNonFragile = objcABIVersion != 1; - - // If we have no runtime argument, ask the toolchain for its default runtime. - // However, the rewriter only really supports the Mac runtime, so assume that. - ObjCRuntime runtime; - if (!runtimeArg) { - switch (rewriteKind) { - case RK_None: - runtime = getToolChain().getDefaultObjCRuntime(isNonFragile); - break; - case RK_Fragile: - runtime = ObjCRuntime(ObjCRuntime::FragileMacOSX, VersionTuple()); - break; - case RK_NonFragile: - runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple()); - break; - } - - // -fnext-runtime - } else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) { - // On Darwin, make this use the default behavior for the toolchain. - if (getToolChain().getTriple().isOSDarwin()) { - runtime = getToolChain().getDefaultObjCRuntime(isNonFragile); - - // Otherwise, build for a generic macosx port. - } else { - runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple()); - } - - // -fgnu-runtime - } else { - assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime)); - // Legacy behaviour is to target the gnustep runtime if we are in - // non-fragile mode or the GCC runtime in fragile mode. - if (isNonFragile) - runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(1, 6)); - else - runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple()); - } - - cmdArgs.push_back( - args.MakeArgString("-fobjc-runtime=" + runtime.getAsString())); - return runtime; -} - -static bool maybeConsumeDash(const std::string &EH, size_t &I) { - bool HaveDash = (I + 1 < EH.size() && EH[I + 1] == '-'); - I += HaveDash; - return !HaveDash; -} - -namespace { -struct EHFlags { - bool Synch = false; - bool Asynch = false; - bool NoUnwindC = false; -}; -} // end anonymous namespace - -/// /EH controls whether to run destructor cleanups when exceptions are -/// thrown. There are three modifiers: -/// - s: Cleanup after "synchronous" exceptions, aka C++ exceptions. -/// - a: Cleanup after "asynchronous" exceptions, aka structured exceptions. -/// The 'a' modifier is unimplemented and fundamentally hard in LLVM IR. -/// - c: Assume that extern "C" functions are implicitly nounwind. -/// The default is /EHs-c-, meaning cleanups are disabled. -static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) { - EHFlags EH; - - std::vector EHArgs = - Args.getAllArgValues(options::OPT__SLASH_EH); - for (auto EHVal : EHArgs) { - for (size_t I = 0, E = EHVal.size(); I != E; ++I) { - switch (EHVal[I]) { - case 'a': - EH.Asynch = maybeConsumeDash(EHVal, I); - if (EH.Asynch) - EH.Synch = false; - continue; - case 'c': - EH.NoUnwindC = maybeConsumeDash(EHVal, I); - continue; - case 's': - EH.Synch = maybeConsumeDash(EHVal, I); - if (EH.Synch) - EH.Asynch = false; - continue; - default: - break; - } - D.Diag(clang::diag::err_drv_invalid_value) << "/EH" << EHVal; - break; - } - } - // The /GX, /GX- flags are only processed if there are not /EH flags. - // The default is that /GX is not specified. - if (EHArgs.empty() && - Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_, - /*default=*/false)) { - EH.Synch = true; - EH.NoUnwindC = true; - } - - return EH; -} - -void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, - ArgStringList &CmdArgs, - codegenoptions::DebugInfoKind *DebugInfoKind, - bool *EmitCodeView) const { - unsigned RTOptionID = options::OPT__SLASH_MT; - - if (Args.hasArg(options::OPT__SLASH_LDd)) - // The /LDd option implies /MTd. The dependent lib part can be overridden, - // but defining _DEBUG is sticky. - RTOptionID = options::OPT__SLASH_MTd; - - if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group)) - RTOptionID = A->getOption().getID(); - - StringRef FlagForCRT; - switch (RTOptionID) { - case options::OPT__SLASH_MD: - if (Args.hasArg(options::OPT__SLASH_LDd)) - CmdArgs.push_back("-D_DEBUG"); - CmdArgs.push_back("-D_MT"); - CmdArgs.push_back("-D_DLL"); - FlagForCRT = "--dependent-lib=msvcrt"; - break; - case options::OPT__SLASH_MDd: - CmdArgs.push_back("-D_DEBUG"); - CmdArgs.push_back("-D_MT"); - CmdArgs.push_back("-D_DLL"); - FlagForCRT = "--dependent-lib=msvcrtd"; - break; - case options::OPT__SLASH_MT: - if (Args.hasArg(options::OPT__SLASH_LDd)) - CmdArgs.push_back("-D_DEBUG"); - CmdArgs.push_back("-D_MT"); - CmdArgs.push_back("-flto-visibility-public-std"); - FlagForCRT = "--dependent-lib=libcmt"; - break; - case options::OPT__SLASH_MTd: - CmdArgs.push_back("-D_DEBUG"); - CmdArgs.push_back("-D_MT"); - CmdArgs.push_back("-flto-visibility-public-std"); - FlagForCRT = "--dependent-lib=libcmtd"; - break; - default: - llvm_unreachable("Unexpected option ID."); - } - - if (Args.hasArg(options::OPT__SLASH_Zl)) { - CmdArgs.push_back("-D_VC_NODEFAULTLIB"); - } else { - CmdArgs.push_back(FlagForCRT.data()); - - // This provides POSIX compatibility (maps 'open' to '_open'), which most - // users want. The /Za flag to cl.exe turns this off, but it's not - // implemented in clang. - CmdArgs.push_back("--dependent-lib=oldnames"); - } - - // Both /showIncludes and /E (and /EP) write to stdout. Allowing both - // would produce interleaved output, so ignore /showIncludes in such cases. - if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_EP)) - if (Arg *A = Args.getLastArg(options::OPT_show_includes)) - A->render(Args, CmdArgs); - - // This controls whether or not we emit RTTI data for polymorphic types. - if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR, - /*default=*/false)) - CmdArgs.push_back("-fno-rtti-data"); - - // This controls whether or not we emit stack-protector instrumentation. - // In MSVC, Buffer Security Check (/GS) is on by default. - if (Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_, - /*default=*/true)) { - CmdArgs.push_back("-stack-protector"); - CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong))); - } - - // Emit CodeView if -Z7, -Zd, or -gline-tables-only are present. - if (Arg *DebugInfoArg = - Args.getLastArg(options::OPT__SLASH_Z7, options::OPT__SLASH_Zd, - options::OPT_gline_tables_only)) { - *EmitCodeView = true; - if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7)) - *DebugInfoKind = codegenoptions::LimitedDebugInfo; - else - *DebugInfoKind = codegenoptions::DebugLineTablesOnly; - CmdArgs.push_back("-gcodeview"); - } else { - *EmitCodeView = false; - } - - const Driver &D = getToolChain().getDriver(); - EHFlags EH = parseClangCLEHFlags(D, Args); - if (EH.Synch || EH.Asynch) { - if (types::isCXX(InputType)) - CmdArgs.push_back("-fcxx-exceptions"); - CmdArgs.push_back("-fexceptions"); - } - if (types::isCXX(InputType) && EH.Synch && EH.NoUnwindC) - CmdArgs.push_back("-fexternc-nounwind"); - - // /EP should expand to -E -P. - if (Args.hasArg(options::OPT__SLASH_EP)) { - CmdArgs.push_back("-E"); - CmdArgs.push_back("-P"); - } - - unsigned VolatileOptionID; - if (getToolChain().getArch() == llvm::Triple::x86_64 || - getToolChain().getArch() == llvm::Triple::x86) - VolatileOptionID = options::OPT__SLASH_volatile_ms; - else - VolatileOptionID = options::OPT__SLASH_volatile_iso; - - if (Arg *A = Args.getLastArg(options::OPT__SLASH_volatile_Group)) - VolatileOptionID = A->getOption().getID(); - - if (VolatileOptionID == options::OPT__SLASH_volatile_ms) - CmdArgs.push_back("-fms-volatile"); - - Arg *MostGeneralArg = Args.getLastArg(options::OPT__SLASH_vmg); - Arg *BestCaseArg = Args.getLastArg(options::OPT__SLASH_vmb); - if (MostGeneralArg && BestCaseArg) - D.Diag(clang::diag::err_drv_argument_not_allowed_with) - << MostGeneralArg->getAsString(Args) << BestCaseArg->getAsString(Args); - - if (MostGeneralArg) { - Arg *SingleArg = Args.getLastArg(options::OPT__SLASH_vms); - Arg *MultipleArg = Args.getLastArg(options::OPT__SLASH_vmm); - Arg *VirtualArg = Args.getLastArg(options::OPT__SLASH_vmv); - - Arg *FirstConflict = SingleArg ? SingleArg : MultipleArg; - Arg *SecondConflict = VirtualArg ? VirtualArg : MultipleArg; - if (FirstConflict && SecondConflict && FirstConflict != SecondConflict) - D.Diag(clang::diag::err_drv_argument_not_allowed_with) - << FirstConflict->getAsString(Args) - << SecondConflict->getAsString(Args); - - if (SingleArg) - CmdArgs.push_back("-fms-memptr-rep=single"); - else if (MultipleArg) - CmdArgs.push_back("-fms-memptr-rep=multiple"); - else - CmdArgs.push_back("-fms-memptr-rep=virtual"); - } - - if (Args.getLastArg(options::OPT__SLASH_Gd)) - CmdArgs.push_back("-fdefault-calling-conv=cdecl"); - else if (Args.getLastArg(options::OPT__SLASH_Gr)) - CmdArgs.push_back("-fdefault-calling-conv=fastcall"); - else if (Args.getLastArg(options::OPT__SLASH_Gz)) - CmdArgs.push_back("-fdefault-calling-conv=stdcall"); - else if (Args.getLastArg(options::OPT__SLASH_Gv)) - CmdArgs.push_back("-fdefault-calling-conv=vectorcall"); - - if (Arg *A = Args.getLastArg(options::OPT_vtordisp_mode_EQ)) - A->render(Args, CmdArgs); - - if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) { - CmdArgs.push_back("-fdiagnostics-format"); - if (Args.hasArg(options::OPT__SLASH_fallback)) - CmdArgs.push_back("msvc-fallback"); - else - CmdArgs.push_back("msvc"); - } -} - -visualstudio::Compiler *Clang::getCLFallback() const { - if (!CLFallback) - CLFallback.reset(new visualstudio::Compiler(getToolChain())); - return CLFallback.get(); -} - -void ClangAs::AddMIPSTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - StringRef CPUName; - StringRef ABIName; - const llvm::Triple &Triple = getToolChain().getTriple(); - mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); - - CmdArgs.push_back("-target-abi"); - CmdArgs.push_back(ABIName.data()); -} - -void ClangAs::AddX86TargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) { - StringRef Value = A->getValue(); - if (Value == "intel" || Value == "att") { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value)); - } else { - getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument) - << A->getOption().getName() << Value; - } - } -} - -void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - - assert(Inputs.size() == 1 && "Unexpected number of inputs."); - const InputInfo &Input = Inputs[0]; - - const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); - const std::string &TripleStr = Triple.getTriple(); - - // Don't warn about "clang -w -c foo.s" - Args.ClaimAllArgs(options::OPT_w); - // and "clang -emit-llvm -c foo.s" - Args.ClaimAllArgs(options::OPT_emit_llvm); - - claimNoWarnArgs(Args); - - // Invoke ourselves in -cc1as mode. - // - // FIXME: Implement custom jobs for internal actions. - CmdArgs.push_back("-cc1as"); - - // Add the "effective" target triple. - CmdArgs.push_back("-triple"); - CmdArgs.push_back(Args.MakeArgString(TripleStr)); - - // Set the output mode, we currently only expect to be used as a real - // assembler. - CmdArgs.push_back("-filetype"); - CmdArgs.push_back("obj"); - - // Set the main file name, so that debug info works even with - // -save-temps or preprocessed assembly. - CmdArgs.push_back("-main-file-name"); - CmdArgs.push_back(Clang::getBaseInputName(Args, Input)); - - // Add the target cpu - std::string CPU = getCPUName(Args, Triple, /*FromAs*/ true); - if (!CPU.empty()) { - CmdArgs.push_back("-target-cpu"); - CmdArgs.push_back(Args.MakeArgString(CPU)); - } - - // Add the target features - getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, true); - - // Ignore explicit -force_cpusubtype_ALL option. - (void)Args.hasArg(options::OPT_force__cpusubtype__ALL); - - // Pass along any -I options so we get proper .include search paths. - Args.AddAllArgs(CmdArgs, options::OPT_I_Group); - - // Determine the original source input. - const Action *SourceAction = &JA; - while (SourceAction->getKind() != Action::InputClass) { - assert(!SourceAction->getInputs().empty() && "unexpected root action!"); - SourceAction = SourceAction->getInputs()[0]; - } - - // Forward -g and handle debug info related flags, assuming we are dealing - // with an actual assembly file. - bool WantDebug = false; - unsigned DwarfVersion = 0; - Args.ClaimAllArgs(options::OPT_g_Group); - if (Arg *A = Args.getLastArg(options::OPT_g_Group)) { - WantDebug = !A->getOption().matches(options::OPT_g0) && - !A->getOption().matches(options::OPT_ggdb0); - if (WantDebug) - DwarfVersion = DwarfVersionNum(A->getSpelling()); - } - if (DwarfVersion == 0) - DwarfVersion = getToolChain().GetDefaultDwarfVersion(); - - codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo; - - if (SourceAction->getType() == types::TY_Asm || - SourceAction->getType() == types::TY_PP_Asm) { - // You might think that it would be ok to set DebugInfoKind outside of - // the guard for source type, however there is a test which asserts - // that some assembler invocation receives no -debug-info-kind, - // and it's not clear whether that test is just overly restrictive. - DebugInfoKind = (WantDebug ? codegenoptions::LimitedDebugInfo - : codegenoptions::NoDebugInfo); - // Add the -fdebug-compilation-dir flag if needed. - addDebugCompDirArg(Args, CmdArgs); - - // Set the AT_producer to the clang version when using the integrated - // assembler on assembly source files. - CmdArgs.push_back("-dwarf-debug-producer"); - CmdArgs.push_back(Args.MakeArgString(getClangFullVersion())); - - // And pass along -I options - Args.AddAllArgs(CmdArgs, options::OPT_I); - } - RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion, - llvm::DebuggerKind::Default); - - // Handle -fPIC et al -- the relocation-model affects the assembler - // for some targets. - llvm::Reloc::Model RelocationModel; - unsigned PICLevel; - bool IsPIE; - std::tie(RelocationModel, PICLevel, IsPIE) = - ParsePICArgs(getToolChain(), Triple, Args); - - const char *RMName = RelocationModelName(RelocationModel); - if (RMName) { - CmdArgs.push_back("-mrelocation-model"); - CmdArgs.push_back(RMName); - } - - // Optionally embed the -cc1as level arguments into the debug info, for build - // analysis. - if (getToolChain().UseDwarfDebugFlags()) { - ArgStringList OriginalArgs; - for (const auto &Arg : Args) - Arg->render(Args, OriginalArgs); - - SmallString<256> Flags; - const char *Exec = getToolChain().getDriver().getClangProgramPath(); - Flags += Exec; - for (const char *OriginalArg : OriginalArgs) { - SmallString<128> EscapedArg; - EscapeSpacesAndBackslashes(OriginalArg, EscapedArg); - Flags += " "; - Flags += EscapedArg; - } - CmdArgs.push_back("-dwarf-debug-flags"); - CmdArgs.push_back(Args.MakeArgString(Flags)); - } - - // FIXME: Add -static support, once we have it. - - // Add target specific flags. - switch (getToolChain().getArch()) { - default: - break; - - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - AddMIPSTargetArgs(Args, CmdArgs); - break; - - case llvm::Triple::x86: - case llvm::Triple::x86_64: - AddX86TargetArgs(Args, CmdArgs); - break; - } - - // Consume all the warning flags. Usually this would be handled more - // gracefully by -cc1 (warning about unknown warning flags, etc) but -cc1as - // doesn't handle that so rather than warning about unused flags that are - // actually used, we'll lie by omission instead. - // FIXME: Stop lying and consume only the appropriate driver flags - Args.ClaimAllArgs(options::OPT_W_Group); - - CollectArgsForIntegratedAssembler(C, Args, CmdArgs, - getToolChain().getDriver()); - - Args.AddAllArgs(CmdArgs, options::OPT_mllvm); - - assert(Output.isFilename() && "Unexpected lipo output."); - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - assert(Input.isFilename() && "Invalid input."); - CmdArgs.push_back(Input.getFilename()); - - const char *Exec = getToolChain().getDriver().getClangProgramPath(); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); - - // Handle the debug info splitting at object creation time if we're - // creating an object. - // TODO: Currently only works on linux with newer objcopy. - if (Args.hasArg(options::OPT_gsplit_dwarf) && - getToolChain().getTriple().isOSLinux()) - SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, - SplitDebugName(Args, Input)); -} - -void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const llvm::opt::ArgList &TCArgs, - const char *LinkingOutput) const { - // The version with only one output is expected to refer to a bundling job. - assert(isa(JA) && "Expecting bundling job!"); - - // The bundling command looks like this: - // clang-offload-bundler -type=bc - // -targets=host-triple,openmp-triple1,openmp-triple2 - // -outputs=input_file - // -inputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2" - - ArgStringList CmdArgs; - - // Get the type. - CmdArgs.push_back(TCArgs.MakeArgString( - Twine("-type=") + types::getTypeTempSuffix(Output.getType()))); - - assert(JA.getInputs().size() == Inputs.size() && - "Not have inputs for all dependence actions??"); - - // Get the targets. - SmallString<128> Triples; - Triples += "-targets="; - for (unsigned I = 0; I < Inputs.size(); ++I) { - if (I) - Triples += ','; - - Action::OffloadKind CurKind = Action::OFK_Host; - const ToolChain *CurTC = &getToolChain(); - const Action *CurDep = JA.getInputs()[I]; - - if (const auto *OA = dyn_cast(CurDep)) { - OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) { - CurKind = A->getOffloadingDeviceKind(); - CurTC = TC; - }); - } - Triples += Action::GetOffloadKindName(CurKind); - Triples += '-'; - Triples += CurTC->getTriple().normalize(); - } - CmdArgs.push_back(TCArgs.MakeArgString(Triples)); - - // Get bundled file command. - CmdArgs.push_back( - TCArgs.MakeArgString(Twine("-outputs=") + Output.getFilename())); - - // Get unbundled files command. - SmallString<128> UB; - UB += "-inputs="; - for (unsigned I = 0; I < Inputs.size(); ++I) { - if (I) - UB += ','; - UB += Inputs[I].getFilename(); - } - CmdArgs.push_back(TCArgs.MakeArgString(UB)); - - // All the inputs are encoded as commands. - C.addCommand(llvm::make_unique( - JA, *this, - TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, None)); -} - -void OffloadBundler::ConstructJobMultipleOutputs( - Compilation &C, const JobAction &JA, const InputInfoList &Outputs, - const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, - const char *LinkingOutput) const { - // The version with multiple outputs is expected to refer to a unbundling job. - auto &UA = cast(JA); - - // The unbundling command looks like this: - // clang-offload-bundler -type=bc - // -targets=host-triple,openmp-triple1,openmp-triple2 - // -inputs=input_file - // -outputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2" - // -unbundle - - ArgStringList CmdArgs; - - assert(Inputs.size() == 1 && "Expecting to unbundle a single file!"); - InputInfo Input = Inputs.front(); - - // Get the type. - CmdArgs.push_back(TCArgs.MakeArgString( - Twine("-type=") + types::getTypeTempSuffix(Input.getType()))); - - // Get the targets. - SmallString<128> Triples; - Triples += "-targets="; - auto DepInfo = UA.getDependentActionsInfo(); - for (unsigned I = 0; I < DepInfo.size(); ++I) { - if (I) - Triples += ','; - - auto &Dep = DepInfo[I]; - Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind); - Triples += '-'; - Triples += Dep.DependentToolChain->getTriple().normalize(); - } - - CmdArgs.push_back(TCArgs.MakeArgString(Triples)); - - // Get bundled file command. - CmdArgs.push_back( - TCArgs.MakeArgString(Twine("-inputs=") + Input.getFilename())); - - // Get unbundled files command. - SmallString<128> UB; - UB += "-outputs="; - for (unsigned I = 0; I < Outputs.size(); ++I) { - if (I) - UB += ','; - UB += Outputs[I].getFilename(); - } - CmdArgs.push_back(TCArgs.MakeArgString(UB)); - CmdArgs.push_back("-unbundle"); - - // All the inputs are encoded as commands. - C.addCommand(llvm::make_unique( - JA, *this, - TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, None)); -} - -void GnuTool::anchor() {} - -void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, const ArgList &Args, - const char *LinkingOutput) const { - const Driver &D = getToolChain().getDriver(); - ArgStringList CmdArgs; - - for (const auto &A : Args) { - if (forwardToGCC(A->getOption())) { - // It is unfortunate that we have to claim here, as this means - // we will basically never report anything interesting for - // platforms using a generic gcc, even if we are just using gcc - // to get to the assembler. - A->claim(); - - // Don't forward any -g arguments to assembly steps. - if (isa(JA) && - A->getOption().matches(options::OPT_g_Group)) - continue; - - // Don't forward any -W arguments to assembly and link steps. - if ((isa(JA) || isa(JA)) && - A->getOption().matches(options::OPT_W_Group)) - continue; - - A->render(Args, CmdArgs); - } - } - - RenderExtraToolArgs(JA, CmdArgs); - - // If using a driver driver, force the arch. - if (getToolChain().getTriple().isOSDarwin()) { - CmdArgs.push_back("-arch"); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().getDefaultUniversalArchName())); - } - - // Try to force gcc to match the tool chain we want, if we recognize - // the arch. - // - // FIXME: The triple class should directly provide the information we want - // here. - switch (getToolChain().getArch()) { - default: - break; - case llvm::Triple::x86: - case llvm::Triple::ppc: - CmdArgs.push_back("-m32"); - break; - case llvm::Triple::x86_64: - case llvm::Triple::ppc64: - case llvm::Triple::ppc64le: - CmdArgs.push_back("-m64"); - break; - case llvm::Triple::sparcel: - CmdArgs.push_back("-EL"); - break; - } - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Unexpected output"); - CmdArgs.push_back("-fsyntax-only"); - } - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - // Only pass -x if gcc will understand it; otherwise hope gcc - // understands the suffix correctly. The main use case this would go - // wrong in is for linker inputs if they happened to have an odd - // suffix; really the only way to get this to happen is a command - // like '-x foobar a.c' which will treat a.c like a linker input. - // - // FIXME: For the linker case specifically, can we safely convert - // inputs into '-Wl,' options? - for (const auto &II : Inputs) { - // Don't try to pass LLVM or AST inputs to a generic gcc. - if (types::isLLVMIR(II.getType())) - D.Diag(diag::err_drv_no_linker_llvm_support) - << getToolChain().getTripleString(); - else if (II.getType() == types::TY_AST) - D.Diag(diag::err_drv_no_ast_support) << getToolChain().getTripleString(); - else if (II.getType() == types::TY_ModuleFile) - D.Diag(diag::err_drv_no_module_support) - << getToolChain().getTripleString(); - - if (types::canTypeBeUserSpecified(II.getType())) { - CmdArgs.push_back("-x"); - CmdArgs.push_back(types::getTypeName(II.getType())); - } - - if (II.isFilename()) - CmdArgs.push_back(II.getFilename()); - else { - const Arg &A = II.getInputArg(); - - // Reverse translate some rewritten options. - if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) { - CmdArgs.push_back("-lstdc++"); - continue; - } - - // Don't render as input, we need gcc to do the translations. - A.render(Args, CmdArgs); - } - } - - const std::string &customGCCName = D.getCCCGenericGCCName(); - const char *GCCName; - if (!customGCCName.empty()) - GCCName = customGCCName.c_str(); - else if (D.CCCIsCXX()) { - GCCName = "g++"; - } else - GCCName = "gcc"; - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName)); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void gcc::Preprocessor::RenderExtraToolArgs(const JobAction &JA, - ArgStringList &CmdArgs) const { - CmdArgs.push_back("-E"); -} - -void gcc::Compiler::RenderExtraToolArgs(const JobAction &JA, - ArgStringList &CmdArgs) const { - const Driver &D = getToolChain().getDriver(); - - switch (JA.getType()) { - // If -flto, etc. are present then make sure not to force assembly output. - case types::TY_LLVM_IR: - case types::TY_LTO_IR: - case types::TY_LLVM_BC: - case types::TY_LTO_BC: - CmdArgs.push_back("-c"); - break; - // We assume we've got an "integrated" assembler in that gcc will produce an - // object file itself. - case types::TY_Object: - CmdArgs.push_back("-c"); - break; - case types::TY_PP_Asm: - CmdArgs.push_back("-S"); - break; - case types::TY_Nothing: - CmdArgs.push_back("-fsyntax-only"); - break; - default: - D.Diag(diag::err_drv_invalid_gcc_output_type) << getTypeName(JA.getType()); - } -} - -void gcc::Linker::RenderExtraToolArgs(const JobAction &JA, - ArgStringList &CmdArgs) const { - // The types are (hopefully) good enough. -} - -// Hexagon tools start. -void hexagon::Assembler::RenderExtraToolArgs(const JobAction &JA, - ArgStringList &CmdArgs) const { -} - -void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - - auto &HTC = static_cast(getToolChain()); - const Driver &D = HTC.getDriver(); - ArgStringList CmdArgs; - - std::string MArchString = "-march=hexagon"; - CmdArgs.push_back(Args.MakeArgString(MArchString)); - - RenderExtraToolArgs(JA, CmdArgs); - - std::string AsName = "hexagon-llvm-mc"; - std::string MCpuString = "-mcpu=hexagon" + - toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); - CmdArgs.push_back("-filetype=obj"); - CmdArgs.push_back(Args.MakeArgString(MCpuString)); - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Unexpected output"); - CmdArgs.push_back("-fsyntax-only"); - } - - if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - std::string N = llvm::utostr(G.getValue()); - CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N)); - } - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - // Only pass -x if gcc will understand it; otherwise hope gcc - // understands the suffix correctly. The main use case this would go - // wrong in is for linker inputs if they happened to have an odd - // suffix; really the only way to get this to happen is a command - // like '-x foobar a.c' which will treat a.c like a linker input. - // - // FIXME: For the linker case specifically, can we safely convert - // inputs into '-Wl,' options? - for (const auto &II : Inputs) { - // Don't try to pass LLVM or AST inputs to a generic gcc. - if (types::isLLVMIR(II.getType())) - D.Diag(clang::diag::err_drv_no_linker_llvm_support) - << HTC.getTripleString(); - else if (II.getType() == types::TY_AST) - D.Diag(clang::diag::err_drv_no_ast_support) - << HTC.getTripleString(); - else if (II.getType() == types::TY_ModuleFile) - D.Diag(diag::err_drv_no_module_support) - << HTC.getTripleString(); - - if (II.isFilename()) - CmdArgs.push_back(II.getFilename()); - else - // Don't render as input, we need gcc to do the translations. - // FIXME: What is this? - II.getInputArg().render(Args, CmdArgs); - } - - auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str())); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void hexagon::Linker::RenderExtraToolArgs(const JobAction &JA, - ArgStringList &CmdArgs) const { -} - -static void -constructHexagonLinkArgs(Compilation &C, const JobAction &JA, - const toolchains::HexagonToolChain &HTC, - const InputInfo &Output, const InputInfoList &Inputs, - const ArgList &Args, ArgStringList &CmdArgs, - const char *LinkingOutput) { - - const Driver &D = HTC.getDriver(); - - //---------------------------------------------------------------------------- - // - //---------------------------------------------------------------------------- - bool IsStatic = Args.hasArg(options::OPT_static); - bool IsShared = Args.hasArg(options::OPT_shared); - bool IsPIE = Args.hasArg(options::OPT_pie); - bool IncStdLib = !Args.hasArg(options::OPT_nostdlib); - bool IncStartFiles = !Args.hasArg(options::OPT_nostartfiles); - bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs); - bool UseG0 = false; - bool UseShared = IsShared && !IsStatic; - - //---------------------------------------------------------------------------- - // Silence warnings for various options - //---------------------------------------------------------------------------- - Args.ClaimAllArgs(options::OPT_g_Group); - Args.ClaimAllArgs(options::OPT_emit_llvm); - Args.ClaimAllArgs(options::OPT_w); // Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_static_libgcc); - - //---------------------------------------------------------------------------- - // - //---------------------------------------------------------------------------- - if (Args.hasArg(options::OPT_s)) - CmdArgs.push_back("-s"); - - if (Args.hasArg(options::OPT_r)) - CmdArgs.push_back("-r"); - - for (const auto &Opt : HTC.ExtraOpts) - CmdArgs.push_back(Opt.c_str()); - - CmdArgs.push_back("-march=hexagon"); - std::string CpuVer = - toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); - std::string MCpuString = "-mcpu=hexagon" + CpuVer; - CmdArgs.push_back(Args.MakeArgString(MCpuString)); - - if (IsShared) { - CmdArgs.push_back("-shared"); - // The following should be the default, but doing as hexagon-gcc does. - CmdArgs.push_back("-call_shared"); - } - - if (IsStatic) - CmdArgs.push_back("-static"); - - if (IsPIE && !IsShared) - CmdArgs.push_back("-pie"); - - if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - std::string N = llvm::utostr(G.getValue()); - CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N)); - UseG0 = G.getValue() == 0; - } - - //---------------------------------------------------------------------------- - // - //---------------------------------------------------------------------------- - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - //---------------------------------------------------------------------------- - // moslib - //---------------------------------------------------------------------------- - std::vector OsLibs; - bool HasStandalone = false; - - for (const Arg *A : Args.filtered(options::OPT_moslib_EQ)) { - A->claim(); - OsLibs.emplace_back(A->getValue()); - HasStandalone = HasStandalone || (OsLibs.back() == "standalone"); - } - if (OsLibs.empty()) { - OsLibs.push_back("standalone"); - HasStandalone = true; - } - - //---------------------------------------------------------------------------- - // Start Files - //---------------------------------------------------------------------------- - const std::string MCpuSuffix = "/" + CpuVer; - const std::string MCpuG0Suffix = MCpuSuffix + "/G0"; - const std::string RootDir = - HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/"; - const std::string StartSubDir = - "hexagon/lib" + (UseG0 ? MCpuG0Suffix : MCpuSuffix); - - auto Find = [&HTC] (const std::string &RootDir, const std::string &SubDir, - const char *Name) -> std::string { - std::string RelName = SubDir + Name; - std::string P = HTC.GetFilePath(RelName.c_str()); - if (llvm::sys::fs::exists(P)) - return P; - return RootDir + RelName; - }; - - if (IncStdLib && IncStartFiles) { - if (!IsShared) { - if (HasStandalone) { - std::string Crt0SA = Find(RootDir, StartSubDir, "/crt0_standalone.o"); - CmdArgs.push_back(Args.MakeArgString(Crt0SA)); - } - std::string Crt0 = Find(RootDir, StartSubDir, "/crt0.o"); - CmdArgs.push_back(Args.MakeArgString(Crt0)); - } - std::string Init = UseShared - ? Find(RootDir, StartSubDir + "/pic", "/initS.o") - : Find(RootDir, StartSubDir, "/init.o"); - CmdArgs.push_back(Args.MakeArgString(Init)); - } - - //---------------------------------------------------------------------------- - // Library Search Paths - //---------------------------------------------------------------------------- - const ToolChain::path_list &LibPaths = HTC.getFilePaths(); - for (const auto &LibPath : LibPaths) - CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath)); - - //---------------------------------------------------------------------------- - // - //---------------------------------------------------------------------------- - Args.AddAllArgs(CmdArgs, - {options::OPT_T_Group, options::OPT_e, options::OPT_s, - options::OPT_t, options::OPT_u_Group}); - - AddLinkerInputs(HTC, Inputs, Args, CmdArgs, JA); - - //---------------------------------------------------------------------------- - // Libraries - //---------------------------------------------------------------------------- - if (IncStdLib && IncDefLibs) { - if (D.CCCIsCXX()) { - HTC.AddCXXStdlibLibArgs(Args, CmdArgs); - CmdArgs.push_back("-lm"); - } - - CmdArgs.push_back("--start-group"); - - if (!IsShared) { - for (const std::string &Lib : OsLibs) - CmdArgs.push_back(Args.MakeArgString("-l" + Lib)); - CmdArgs.push_back("-lc"); - } - CmdArgs.push_back("-lgcc"); - - CmdArgs.push_back("--end-group"); - } - - //---------------------------------------------------------------------------- - // End files - //---------------------------------------------------------------------------- - if (IncStdLib && IncStartFiles) { - std::string Fini = UseShared - ? Find(RootDir, StartSubDir + "/pic", "/finiS.o") - : Find(RootDir, StartSubDir, "/fini.o"); - CmdArgs.push_back(Args.MakeArgString(Fini)); - } -} - -void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - auto &HTC = static_cast(getToolChain()); - - ArgStringList CmdArgs; - constructHexagonLinkArgs(C, JA, HTC, Output, Inputs, Args, CmdArgs, - LinkingOutput); - - std::string Linker = HTC.GetProgramPath("hexagon-link"); - C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Linker), - CmdArgs, Inputs)); -} -// Hexagon tools end. - -void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - - std::string Linker = getToolChain().GetProgramPath(getShortName()); - ArgStringList CmdArgs; - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - CmdArgs.push_back("-shared"); - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Linker), - CmdArgs, Inputs)); -} -// AMDGPU tools end. - -wasm::Linker::Linker(const ToolChain &TC) - : GnuTool("wasm::Linker", "lld", TC) {} - -bool wasm::Linker::isLinkJob() const { - return true; -} - -bool wasm::Linker::hasIntegratedCPP() const { - return false; -} - -void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - - const ToolChain &ToolChain = getToolChain(); - const Driver &D = ToolChain.getDriver(); - const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath()); - ArgStringList CmdArgs; - CmdArgs.push_back("-flavor"); - CmdArgs.push_back("ld"); - - // Enable garbage collection of unused input sections by default, since code - // size is of particular importance. This is significantly facilitated by - // the enabling of -ffunction-sections and -fdata-sections in - // Clang::ConstructJob. - if (areOptimizationsEnabled(Args)) - CmdArgs.push_back("--gc-sections"); - - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - if (Args.hasArg(options::OPT_s)) - CmdArgs.push_back("--strip-all"); - if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("-shared"); - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("-Bstatic"); - - Args.AddAllArgs(CmdArgs, options::OPT_L); - ToolChain.AddFilePathLibArgs(Args, CmdArgs); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("rcrt1.o"))); - else if (Args.hasArg(options::OPT_pie)) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("Scrt1.o"))); - else - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o"))); - - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); - } - - AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - if (D.CCCIsCXX()) - ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - - if (Args.hasArg(options::OPT_pthread)) - CmdArgs.push_back("-lpthread"); - - CmdArgs.push_back("-lc"); - CmdArgs.push_back("-lcompiler_rt"); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - C.addCommand(llvm::make_unique(JA, *this, Linker, CmdArgs, Inputs)); -} - -const std::string arm::getARMArch(StringRef Arch, const llvm::Triple &Triple) { - std::string MArch; - if (!Arch.empty()) - MArch = Arch; - else - MArch = Triple.getArchName(); - MArch = StringRef(MArch).split("+").first.lower(); - - // Handle -march=native. - if (MArch == "native") { - std::string CPU = llvm::sys::getHostCPUName(); - if (CPU != "generic") { - // Translate the native cpu into the architecture suffix for that CPU. - StringRef Suffix = arm::getLLVMArchSuffixForARM(CPU, MArch, Triple); - // If there is no valid architecture suffix for this CPU we don't know how - // to handle it, so return no architecture. - if (Suffix.empty()) - MArch = ""; - else - MArch = std::string("arm") + Suffix.str(); - } - } - - return MArch; -} - -/// Get the (LLVM) name of the minimum ARM CPU for the arch we are targeting. -StringRef arm::getARMCPUForMArch(StringRef Arch, const llvm::Triple &Triple) { - std::string MArch = getARMArch(Arch, Triple); - // getARMCPUForArch defaults to the triple if MArch is empty, but empty MArch - // here means an -march=native that we can't handle, so instead return no CPU. - if (MArch.empty()) - return StringRef(); - - // We need to return an empty string here on invalid MArch values as the - // various places that call this function can't cope with a null result. - return Triple.getARMCPUForArch(MArch); -} - -/// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting. -std::string arm::getARMTargetCPU(StringRef CPU, StringRef Arch, - const llvm::Triple &Triple) { - // FIXME: Warn on inconsistent use of -mcpu and -march. - // If we have -mcpu=, use that. - if (!CPU.empty()) { - std::string MCPU = StringRef(CPU).split("+").first.lower(); - // Handle -mcpu=native. - if (MCPU == "native") - return llvm::sys::getHostCPUName(); - else - return MCPU; - } - - return getARMCPUForMArch(Arch, Triple); -} - -/// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular -/// CPU (or Arch, if CPU is generic). -// FIXME: This is redundant with -mcpu, why does LLVM use this. -StringRef arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch, - const llvm::Triple &Triple) { - unsigned ArchKind; - if (CPU == "generic") { - std::string ARMArch = tools::arm::getARMArch(Arch, Triple); - ArchKind = llvm::ARM::parseArch(ARMArch); - if (ArchKind == llvm::ARM::AK_INVALID) - // In case of generic Arch, i.e. "arm", - // extract arch from default cpu of the Triple - ArchKind = llvm::ARM::parseCPUArch(Triple.getARMCPUForArch(ARMArch)); - } else { - // FIXME: horrible hack to get around the fact that Cortex-A7 is only an - // armv7k triple if it's actually been specified via "-arch armv7k". - ArchKind = (Arch == "armv7k" || Arch == "thumbv7k") - ? (unsigned)llvm::ARM::AK_ARMV7K - : llvm::ARM::parseCPUArch(CPU); - } - if (ArchKind == llvm::ARM::AK_INVALID) - return ""; - return llvm::ARM::getSubArch(ArchKind); -} - -void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs, - const llvm::Triple &Triple) { - if (Args.hasArg(options::OPT_r)) - return; - - // ARMv7 (and later) and ARMv6-M do not support BE-32, so instruct the linker - // to generate BE-8 executables. - if (getARMSubArchVersionNumber(Triple) >= 7 || isARMMProfile(Triple)) - CmdArgs.push_back("--be8"); -} - -mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) { - // Strictly speaking, mips32r2 and mips64r2 are NanLegacy-only since Nan2008 - // was first introduced in Release 3. However, other compilers have - // traditionally allowed it for Release 2 so we should do the same. - return (NanEncoding)llvm::StringSwitch(CPU) - .Case("mips1", NanLegacy) - .Case("mips2", NanLegacy) - .Case("mips3", NanLegacy) - .Case("mips4", NanLegacy) - .Case("mips5", NanLegacy) - .Case("mips32", NanLegacy) - .Case("mips32r2", NanLegacy | Nan2008) - .Case("mips32r3", NanLegacy | Nan2008) - .Case("mips32r5", NanLegacy | Nan2008) - .Case("mips32r6", Nan2008) - .Case("mips64", NanLegacy) - .Case("mips64r2", NanLegacy | Nan2008) - .Case("mips64r3", NanLegacy | Nan2008) - .Case("mips64r5", NanLegacy | Nan2008) - .Case("mips64r6", Nan2008) - .Default(NanLegacy); -} - -bool mips::hasCompactBranches(StringRef &CPU) { - // mips32r6 and mips64r6 have compact branches. - return llvm::StringSwitch(CPU) - .Case("mips32r6", true) - .Case("mips64r6", true) - .Default(false); -} - -bool mips::hasMipsAbiArg(const ArgList &Args, const char *Value) { - Arg *A = Args.getLastArg(options::OPT_mabi_EQ); - return A && (A->getValue() == StringRef(Value)); -} - -bool mips::isUCLibc(const ArgList &Args) { - Arg *A = Args.getLastArg(options::OPT_m_libc_Group); - return A && A->getOption().matches(options::OPT_muclibc); -} - -bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) { - if (Arg *NaNArg = Args.getLastArg(options::OPT_mnan_EQ)) - return llvm::StringSwitch(NaNArg->getValue()) - .Case("2008", true) - .Case("legacy", false) - .Default(false); - - // NaN2008 is the default for MIPS32r6/MIPS64r6. - return llvm::StringSwitch(getCPUName(Args, Triple)) - .Cases("mips32r6", "mips64r6", true) - .Default(false); - - return false; -} - -bool mips::isFP64ADefault(const llvm::Triple &Triple, StringRef CPUName) { - if (!Triple.isAndroid()) - return false; - - // Android MIPS32R6 defaults to FP64A. - return llvm::StringSwitch(CPUName) - .Case("mips32r6", true) - .Default(false); -} - -bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName, - StringRef ABIName, mips::FloatABI FloatABI) { - if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies && - Triple.getVendor() != llvm::Triple::MipsTechnologies && - !Triple.isAndroid()) - return false; - - if (ABIName != "32") - return false; - - // FPXX shouldn't be used if either -msoft-float or -mfloat-abi=soft is - // present. - if (FloatABI == mips::FloatABI::Soft) - return false; - - return llvm::StringSwitch(CPUName) - .Cases("mips2", "mips3", "mips4", "mips5", true) - .Cases("mips32", "mips32r2", "mips32r3", "mips32r5", true) - .Cases("mips64", "mips64r2", "mips64r3", "mips64r5", true) - .Default(false); -} - -bool mips::shouldUseFPXX(const ArgList &Args, const llvm::Triple &Triple, - StringRef CPUName, StringRef ABIName, - mips::FloatABI FloatABI) { - bool UseFPXX = isFPXXDefault(Triple, CPUName, ABIName, FloatABI); - - // FPXX shouldn't be used if -msingle-float is present. - if (Arg *A = Args.getLastArg(options::OPT_msingle_float, - options::OPT_mdouble_float)) - if (A->getOption().matches(options::OPT_msingle_float)) - UseFPXX = false; - - return UseFPXX; -} - -llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) { - // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for - // archs which Darwin doesn't use. - - // The matching this routine does is fairly pointless, since it is neither the - // complete architecture list, nor a reasonable subset. The problem is that - // historically the driver driver accepts this and also ties its -march= - // handling to the architecture name, so we need to be careful before removing - // support for it. - - // This code must be kept in sync with Clang's Darwin specific argument - // translation. - - return llvm::StringSwitch(Str) - .Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", llvm::Triple::ppc) - .Cases("ppc750", "ppc7400", "ppc7450", "ppc970", llvm::Triple::ppc) - .Case("ppc64", llvm::Triple::ppc64) - .Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86) - .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4", - llvm::Triple::x86) - .Cases("x86_64", "x86_64h", llvm::Triple::x86_64) - // This is derived from the driver driver. - .Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm) - .Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm) - .Cases("armv7s", "xscale", llvm::Triple::arm) - .Case("arm64", llvm::Triple::aarch64) - .Case("r600", llvm::Triple::r600) - .Case("amdgcn", llvm::Triple::amdgcn) - .Case("nvptx", llvm::Triple::nvptx) - .Case("nvptx64", llvm::Triple::nvptx64) - .Case("amdil", llvm::Triple::amdil) - .Case("spir", llvm::Triple::spir) - .Default(llvm::Triple::UnknownArch); -} - -void darwin::setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str) { - const llvm::Triple::ArchType Arch = getArchTypeForMachOArchName(Str); - unsigned ArchKind = llvm::ARM::parseArch(Str); - T.setArch(Arch); - - if (Str == "x86_64h") - T.setArchName(Str); - else if (ArchKind == llvm::ARM::AK_ARMV6M || - ArchKind == llvm::ARM::AK_ARMV7M || - ArchKind == llvm::ARM::AK_ARMV7EM) { - T.setOS(llvm::Triple::UnknownOS); - T.setObjectFormat(llvm::Triple::MachO); - } -} - -const char *Clang::getBaseInputName(const ArgList &Args, - const InputInfo &Input) { - return Args.MakeArgString(llvm::sys::path::filename(Input.getBaseInput())); -} - -const char *Clang::getBaseInputStem(const ArgList &Args, - const InputInfoList &Inputs) { - const char *Str = getBaseInputName(Args, Inputs[0]); - - if (const char *End = strrchr(Str, '.')) - return Args.MakeArgString(std::string(Str, End)); - - return Str; -} - -const char *Clang::getDependencyFileName(const ArgList &Args, - const InputInfoList &Inputs) { - // FIXME: Think about this more. - std::string Res; - - if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) { - std::string Str(OutputOpt->getValue()); - Res = Str.substr(0, Str.rfind('.')); - } else { - Res = getBaseInputStem(Args, Inputs); - } - return Args.MakeArgString(Res + ".d"); -} - -void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const ToolChain &ToolChain = getToolChain(); - const Driver &D = ToolChain.getDriver(); - ArgStringList CmdArgs; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo". Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_w); - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - // CloudABI only supports static linkage. - CmdArgs.push_back("-Bstatic"); - CmdArgs.push_back("--no-dynamic-linker"); - - // Provide PIE linker flags in case PIE is default for the architecture. - if (ToolChain.isPIEDefault()) { - CmdArgs.push_back("-pie"); - CmdArgs.push_back("-zrelro"); - } - - CmdArgs.push_back("--eh-frame-hdr"); - CmdArgs.push_back("--gc-sections"); - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o"))); - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o"))); - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - ToolChain.AddFilePathLibArgs(Args, CmdArgs); - Args.AddAllArgs(CmdArgs, - {options::OPT_T_Group, options::OPT_e, options::OPT_s, - options::OPT_t, options::OPT_Z_Flag, options::OPT_r}); - - if (D.isUsingLTO()) - AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin, D); - - AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - if (D.CCCIsCXX()) - ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - CmdArgs.push_back("-lc"); - CmdArgs.push_back("-lcompiler_rt"); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); - - const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - - assert(Inputs.size() == 1 && "Unexpected number of inputs."); - const InputInfo &Input = Inputs[0]; - - // Determine the original source input. - const Action *SourceAction = &JA; - while (SourceAction->getKind() != Action::InputClass) { - assert(!SourceAction->getInputs().empty() && "unexpected root action!"); - SourceAction = SourceAction->getInputs()[0]; - } - - // If -fno-integrated-as is used add -Q to the darwin assember driver to make - // sure it runs its system assembler not clang's integrated assembler. - // Applicable to darwin11+ and Xcode 4+. darwin<10 lacked integrated-as. - // FIXME: at run-time detect assembler capabilities or rely on version - // information forwarded by -target-assembler-version. - if (Args.hasArg(options::OPT_fno_integrated_as)) { - const llvm::Triple &T(getToolChain().getTriple()); - if (!(T.isMacOSX() && T.isMacOSXVersionLT(10, 7))) - CmdArgs.push_back("-Q"); - } - - // Forward -g, assuming we are dealing with an actual assembly file. - if (SourceAction->getType() == types::TY_Asm || - SourceAction->getType() == types::TY_PP_Asm) { - if (Args.hasArg(options::OPT_gstabs)) - CmdArgs.push_back("--gstabs"); - else if (Args.hasArg(options::OPT_g_Group)) - CmdArgs.push_back("-g"); - } - - // Derived from asm spec. - AddMachOArch(Args, CmdArgs); - - // Use -force_cpusubtype_ALL on x86 by default. - if (getToolChain().getArch() == llvm::Triple::x86 || - getToolChain().getArch() == llvm::Triple::x86_64 || - Args.hasArg(options::OPT_force__cpusubtype__ALL)) - CmdArgs.push_back("-force_cpusubtype_ALL"); - - if (getToolChain().getArch() != llvm::Triple::x86_64 && - (((Args.hasArg(options::OPT_mkernel) || - Args.hasArg(options::OPT_fapple_kext)) && - getMachOToolChain().isKernelStatic()) || - Args.hasArg(options::OPT_static))) - CmdArgs.push_back("-static"); - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - assert(Output.isFilename() && "Unexpected lipo output."); - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - assert(Input.isFilename() && "Invalid input."); - CmdArgs.push_back(Input.getFilename()); - - // asm_final spec is empty. - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void darwin::MachOTool::anchor() {} - -void darwin::MachOTool::AddMachOArch(const ArgList &Args, - ArgStringList &CmdArgs) const { - StringRef ArchName = getMachOToolChain().getMachOArchName(Args); - - // Derived from darwin_arch spec. - CmdArgs.push_back("-arch"); - CmdArgs.push_back(Args.MakeArgString(ArchName)); - - // FIXME: Is this needed anymore? - if (ArchName == "arm") - CmdArgs.push_back("-force_cpusubtype_ALL"); -} - -bool darwin::Linker::NeedsTempPath(const InputInfoList &Inputs) const { - // We only need to generate a temp path for LTO if we aren't compiling object - // files. When compiling source files, we run 'dsymutil' after linking. We - // don't run 'dsymutil' when compiling object files. - for (const auto &Input : Inputs) - if (Input.getType() != types::TY_Object) - return true; - - return false; -} - -/// \brief Pass -no_deduplicate to ld64 under certain conditions: -/// -/// - Either -O0 or -O1 is explicitly specified -/// - No -O option is specified *and* this is a compile+link (implicit -O0) -/// -/// Also do *not* add -no_deduplicate when no -O option is specified and this -/// is just a link (we can't imply -O0) -static bool shouldLinkerNotDedup(bool IsLinkerOnlyAction, const ArgList &Args) { - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (A->getOption().matches(options::OPT_O0)) - return true; - if (A->getOption().matches(options::OPT_O)) - return llvm::StringSwitch(A->getValue()) - .Case("1", true) - .Default(false); - return false; // OPT_Ofast & OPT_O4 - } - - if (!IsLinkerOnlyAction) // Implicit -O0 for compile+linker only. - return true; - return false; -} - -void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, - ArgStringList &CmdArgs, - const InputInfoList &Inputs) const { - const Driver &D = getToolChain().getDriver(); - const toolchains::MachO &MachOTC = getMachOToolChain(); - - unsigned Version[5] = {0, 0, 0, 0, 0}; - if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) { - if (!Driver::GetReleaseVersion(A->getValue(), Version)) - D.Diag(diag::err_drv_invalid_version_number) << A->getAsString(Args); - } - - // Newer linkers support -demangle. Pass it if supported and not disabled by - // the user. - if (Version[0] >= 100 && !Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) - CmdArgs.push_back("-demangle"); - - if (Args.hasArg(options::OPT_rdynamic) && Version[0] >= 137) - CmdArgs.push_back("-export_dynamic"); - - // If we are using App Extension restrictions, pass a flag to the linker - // telling it that the compiled code has been audited. - if (Args.hasFlag(options::OPT_fapplication_extension, - options::OPT_fno_application_extension, false)) - CmdArgs.push_back("-application_extension"); - - if (D.isUsingLTO()) { - // If we are using LTO, then automatically create a temporary file path for - // the linker to use, so that it's lifetime will extend past a possible - // dsymutil step. - if (Version[0] >= 116 && NeedsTempPath(Inputs)) { - const char *TmpPath = C.getArgs().MakeArgString( - D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object))); - C.addTempFile(TmpPath); - CmdArgs.push_back("-object_path_lto"); - CmdArgs.push_back(TmpPath); - } - } - - // Use -lto_library option to specify the libLTO.dylib path. Try to find - // it in clang installed libraries. ld64 will only look at this argument - // when it actually uses LTO, so libLTO.dylib only needs to exist at link - // time if ld64 decides that it needs to use LTO. - // Since this is passed unconditionally, ld64 will never look for libLTO.dylib - // next to it. That's ok since ld64 using a libLTO.dylib not matching the - // clang version won't work anyways. - if (Version[0] >= 133) { - // Search for libLTO in /../lib/libLTO.dylib - StringRef P = llvm::sys::path::parent_path(D.Dir); - SmallString<128> LibLTOPath(P); - llvm::sys::path::append(LibLTOPath, "lib"); - llvm::sys::path::append(LibLTOPath, "libLTO.dylib"); - CmdArgs.push_back("-lto_library"); - CmdArgs.push_back(C.getArgs().MakeArgString(LibLTOPath)); - } - - // ld64 version 262 and above run the deduplicate pass by default. - if (Version[0] >= 262 && shouldLinkerNotDedup(C.getJobs().empty(), Args)) - CmdArgs.push_back("-no_deduplicate"); - - // Derived from the "link" spec. - Args.AddAllArgs(CmdArgs, options::OPT_static); - if (!Args.hasArg(options::OPT_static)) - CmdArgs.push_back("-dynamic"); - if (Args.hasArg(options::OPT_fgnu_runtime)) { - // FIXME: gcc replaces -lobjc in forward args with -lobjc-gnu - // here. How do we wish to handle such things? - } - - if (!Args.hasArg(options::OPT_dynamiclib)) { - AddMachOArch(Args, CmdArgs); - // FIXME: Why do this only on this path? - Args.AddLastArg(CmdArgs, options::OPT_force__cpusubtype__ALL); - - Args.AddLastArg(CmdArgs, options::OPT_bundle); - Args.AddAllArgs(CmdArgs, options::OPT_bundle__loader); - Args.AddAllArgs(CmdArgs, options::OPT_client__name); - - Arg *A; - if ((A = Args.getLastArg(options::OPT_compatibility__version)) || - (A = Args.getLastArg(options::OPT_current__version)) || - (A = Args.getLastArg(options::OPT_install__name))) - D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) - << "-dynamiclib"; - - Args.AddLastArg(CmdArgs, options::OPT_force__flat__namespace); - Args.AddLastArg(CmdArgs, options::OPT_keep__private__externs); - Args.AddLastArg(CmdArgs, options::OPT_private__bundle); - } else { - CmdArgs.push_back("-dylib"); - - Arg *A; - if ((A = Args.getLastArg(options::OPT_bundle)) || - (A = Args.getLastArg(options::OPT_bundle__loader)) || - (A = Args.getLastArg(options::OPT_client__name)) || - (A = Args.getLastArg(options::OPT_force__flat__namespace)) || - (A = Args.getLastArg(options::OPT_keep__private__externs)) || - (A = Args.getLastArg(options::OPT_private__bundle))) - D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) - << "-dynamiclib"; - - Args.AddAllArgsTranslated(CmdArgs, options::OPT_compatibility__version, - "-dylib_compatibility_version"); - Args.AddAllArgsTranslated(CmdArgs, options::OPT_current__version, - "-dylib_current_version"); - - AddMachOArch(Args, CmdArgs); - - Args.AddAllArgsTranslated(CmdArgs, options::OPT_install__name, - "-dylib_install_name"); - } - - Args.AddLastArg(CmdArgs, options::OPT_all__load); - Args.AddAllArgs(CmdArgs, options::OPT_allowable__client); - Args.AddLastArg(CmdArgs, options::OPT_bind__at__load); - if (MachOTC.isTargetIOSBased()) - Args.AddLastArg(CmdArgs, options::OPT_arch__errors__fatal); - Args.AddLastArg(CmdArgs, options::OPT_dead__strip); - Args.AddLastArg(CmdArgs, options::OPT_no__dead__strip__inits__and__terms); - Args.AddAllArgs(CmdArgs, options::OPT_dylib__file); - Args.AddLastArg(CmdArgs, options::OPT_dynamic); - Args.AddAllArgs(CmdArgs, options::OPT_exported__symbols__list); - Args.AddLastArg(CmdArgs, options::OPT_flat__namespace); - Args.AddAllArgs(CmdArgs, options::OPT_force__load); - Args.AddAllArgs(CmdArgs, options::OPT_headerpad__max__install__names); - Args.AddAllArgs(CmdArgs, options::OPT_image__base); - Args.AddAllArgs(CmdArgs, options::OPT_init); - - // Add the deployment target. - MachOTC.addMinVersionArgs(Args, CmdArgs); - - Args.AddLastArg(CmdArgs, options::OPT_nomultidefs); - Args.AddLastArg(CmdArgs, options::OPT_multi__module); - Args.AddLastArg(CmdArgs, options::OPT_single__module); - Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined); - Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined__unused); - - if (const Arg *A = - Args.getLastArg(options::OPT_fpie, options::OPT_fPIE, - options::OPT_fno_pie, options::OPT_fno_PIE)) { - if (A->getOption().matches(options::OPT_fpie) || - A->getOption().matches(options::OPT_fPIE)) - CmdArgs.push_back("-pie"); - else - CmdArgs.push_back("-no_pie"); - } - - // for embed-bitcode, use -bitcode_bundle in linker command - if (C.getDriver().embedBitcodeEnabled()) { - // Check if the toolchain supports bitcode build flow. - if (MachOTC.SupportsEmbeddedBitcode()) - CmdArgs.push_back("-bitcode_bundle"); - else - D.Diag(diag::err_drv_bitcode_unsupported_on_toolchain); - } - - Args.AddLastArg(CmdArgs, options::OPT_prebind); - Args.AddLastArg(CmdArgs, options::OPT_noprebind); - Args.AddLastArg(CmdArgs, options::OPT_nofixprebinding); - Args.AddLastArg(CmdArgs, options::OPT_prebind__all__twolevel__modules); - Args.AddLastArg(CmdArgs, options::OPT_read__only__relocs); - Args.AddAllArgs(CmdArgs, options::OPT_sectcreate); - Args.AddAllArgs(CmdArgs, options::OPT_sectorder); - Args.AddAllArgs(CmdArgs, options::OPT_seg1addr); - Args.AddAllArgs(CmdArgs, options::OPT_segprot); - Args.AddAllArgs(CmdArgs, options::OPT_segaddr); - Args.AddAllArgs(CmdArgs, options::OPT_segs__read__only__addr); - Args.AddAllArgs(CmdArgs, options::OPT_segs__read__write__addr); - Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table); - Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table__filename); - Args.AddAllArgs(CmdArgs, options::OPT_sub__library); - Args.AddAllArgs(CmdArgs, options::OPT_sub__umbrella); - - // Give --sysroot= preference, over the Apple specific behavior to also use - // --isysroot as the syslibroot. - StringRef sysroot = C.getSysRoot(); - if (sysroot != "") { - CmdArgs.push_back("-syslibroot"); - CmdArgs.push_back(C.getArgs().MakeArgString(sysroot)); - } else if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) { - CmdArgs.push_back("-syslibroot"); - CmdArgs.push_back(A->getValue()); - } - - Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace); - Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace__hints); - Args.AddAllArgs(CmdArgs, options::OPT_umbrella); - Args.AddAllArgs(CmdArgs, options::OPT_undefined); - Args.AddAllArgs(CmdArgs, options::OPT_unexported__symbols__list); - Args.AddAllArgs(CmdArgs, options::OPT_weak__reference__mismatches); - Args.AddLastArg(CmdArgs, options::OPT_X_Flag); - Args.AddAllArgs(CmdArgs, options::OPT_y); - Args.AddLastArg(CmdArgs, options::OPT_w); - Args.AddAllArgs(CmdArgs, options::OPT_pagezero__size); - Args.AddAllArgs(CmdArgs, options::OPT_segs__read__); - Args.AddLastArg(CmdArgs, options::OPT_seglinkedit); - Args.AddLastArg(CmdArgs, options::OPT_noseglinkedit); - Args.AddAllArgs(CmdArgs, options::OPT_sectalign); - Args.AddAllArgs(CmdArgs, options::OPT_sectobjectsymbols); - Args.AddAllArgs(CmdArgs, options::OPT_segcreate); - Args.AddLastArg(CmdArgs, options::OPT_whyload); - Args.AddLastArg(CmdArgs, options::OPT_whatsloaded); - Args.AddAllArgs(CmdArgs, options::OPT_dylinker__install__name); - Args.AddLastArg(CmdArgs, options::OPT_dylinker); - Args.AddLastArg(CmdArgs, options::OPT_Mach); -} - -void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - assert(Output.getType() == types::TY_Image && "Invalid linker output type."); - - // If the number of arguments surpasses the system limits, we will encode the - // input files in a separate file, shortening the command line. To this end, - // build a list of input file names that can be passed via a file with the - // -filelist linker option. - llvm::opt::ArgStringList InputFileList; - - // The logic here is derived from gcc's behavior; most of which - // comes from specs (starting with link_command). Consult gcc for - // more information. - ArgStringList CmdArgs; - - /// Hack(tm) to ignore linking errors when we are doing ARC migration. - if (Args.hasArg(options::OPT_ccc_arcmt_check, - options::OPT_ccc_arcmt_migrate)) { - for (const auto &Arg : Args) - Arg->claim(); - const char *Exec = - Args.MakeArgString(getToolChain().GetProgramPath("touch")); - CmdArgs.push_back(Output.getFilename()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, None)); - return; - } - - // I'm not sure why this particular decomposition exists in gcc, but - // we follow suite for ease of comparison. - AddLinkArgs(C, Args, CmdArgs, Inputs); - - // For LTO, pass the name of the optimization record file. - if (Args.hasFlag(options::OPT_fsave_optimization_record, - options::OPT_fno_save_optimization_record, false)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-lto-pass-remarks-output"); - CmdArgs.push_back("-mllvm"); - - SmallString<128> F; - F = Output.getFilename(); - F += ".opt.yaml"; - CmdArgs.push_back(Args.MakeArgString(F)); - - if (getLastProfileUseArg(Args)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-lto-pass-remarks-with-hotness"); - } - } - - // It seems that the 'e' option is completely ignored for dynamic executables - // (the default), and with static executables, the last one wins, as expected. - Args.AddAllArgs(CmdArgs, {options::OPT_d_Flag, options::OPT_s, options::OPT_t, - options::OPT_Z_Flag, options::OPT_u_Group, - options::OPT_e, options::OPT_r}); - - // Forward -ObjC when either -ObjC or -ObjC++ is used, to force loading - // members of static archive libraries which implement Objective-C classes or - // categories. - if (Args.hasArg(options::OPT_ObjC) || Args.hasArg(options::OPT_ObjCXX)) - CmdArgs.push_back("-ObjC"); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) - getMachOToolChain().addStartObjectFileArgs(Args, CmdArgs); - - // SafeStack requires its own runtime libraries - // These libraries should be linked first, to make sure the - // __safestack_init constructor executes before everything else - if (getToolChain().getSanitizerArgs().needsSafeStackRt()) { - getMachOToolChain().AddLinkRuntimeLib(Args, CmdArgs, - "libclang_rt.safestack_osx.a", - /*AlwaysLink=*/true); - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - // Build the input file for -filelist (list of linker input files) in case we - // need it later - for (const auto &II : Inputs) { - if (!II.isFilename()) { - // This is a linker input argument. - // We cannot mix input arguments and file names in a -filelist input, thus - // we prematurely stop our list (remaining files shall be passed as - // arguments). - if (InputFileList.size() > 0) - break; - - continue; - } - - InputFileList.push_back(II.getFilename()); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) - addOpenMPRuntime(CmdArgs, getToolChain(), Args); - - if (isObjCRuntimeLinked(Args) && - !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - // We use arclite library for both ARC and subscripting support. - getMachOToolChain().AddLinkARCArgs(Args, CmdArgs); - - CmdArgs.push_back("-framework"); - CmdArgs.push_back("Foundation"); - // Link libobj. - CmdArgs.push_back("-lobjc"); - } - - if (LinkingOutput) { - CmdArgs.push_back("-arch_multiple"); - CmdArgs.push_back("-final_output"); - CmdArgs.push_back(LinkingOutput); - } - - if (Args.hasArg(options::OPT_fnested_functions)) - CmdArgs.push_back("-allow_stack_execute"); - - getMachOToolChain().addProfileRTLibs(Args, CmdArgs); - - if (unsigned Parallelism = - getLTOParallelism(Args, getToolChain().getDriver())) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back( - Args.MakeArgString(Twine("-threads=") + llvm::to_string(Parallelism))); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - if (getToolChain().getDriver().CCCIsCXX()) - getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); - - // link_ssp spec is empty. - - // Let the tool chain choose which runtime library to link. - getMachOToolChain().AddLinkRuntimeLibArgs(Args, CmdArgs); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - // endfile_spec is empty. - } - - Args.AddAllArgs(CmdArgs, options::OPT_T_Group); - Args.AddAllArgs(CmdArgs, options::OPT_F); - - // -iframework should be forwarded as -F. - for (const Arg *A : Args.filtered(options::OPT_iframework)) - CmdArgs.push_back(Args.MakeArgString(std::string("-F") + A->getValue())); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - if (Arg *A = Args.getLastArg(options::OPT_fveclib)) { - if (A->getValue() == StringRef("Accelerate")) { - CmdArgs.push_back("-framework"); - CmdArgs.push_back("Accelerate"); - } - } - } - - const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - std::unique_ptr Cmd = - llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs); - Cmd->setInputFileList(std::move(InputFileList)); - C.addCommand(std::move(Cmd)); -} - -void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - - CmdArgs.push_back("-create"); - assert(Output.isFilename() && "Unexpected lipo output."); - - CmdArgs.push_back("-output"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) { - assert(II.isFilename() && "Unexpected lipo input."); - CmdArgs.push_back(II.getFilename()); - } - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("lipo")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - assert(Inputs.size() == 1 && "Unable to handle multiple inputs."); - const InputInfo &Input = Inputs[0]; - assert(Input.isFilename() && "Unexpected dsymutil input."); - CmdArgs.push_back(Input.getFilename()); - - const char *Exec = - Args.MakeArgString(getToolChain().GetProgramPath("dsymutil")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - CmdArgs.push_back("--verify"); - CmdArgs.push_back("--debug-info"); - CmdArgs.push_back("--eh-frame"); - CmdArgs.push_back("--quiet"); - - assert(Inputs.size() == 1 && "Unable to handle multiple inputs."); - const InputInfo &Input = Inputs[0]; - assert(Input.isFilename() && "Unexpected verify input"); - - // Grabbing the output of the earlier dsymutil run. - CmdArgs.push_back(Input.getFilename()); - - const char *Exec = - Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - - // Demangle C++ names in errors - CmdArgs.push_back("-C"); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) { - CmdArgs.push_back("-e"); - CmdArgs.push_back("_start"); - } - - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-Bstatic"); - CmdArgs.push_back("-dn"); - } else { - CmdArgs.push_back("-Bdynamic"); - if (Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-shared"); - } else { - CmdArgs.push_back("--dynamic-linker"); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("ld.so.1"))); - } - } - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!Args.hasArg(options::OPT_shared)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); - - CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("values-Xa.o"))); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); - } - - getToolChain().AddFilePathLibArgs(Args, CmdArgs); - - Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, - options::OPT_e, options::OPT_r}); - - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - if (getToolChain().getDriver().CCCIsCXX()) - getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); - CmdArgs.push_back("-lgcc_s"); - CmdArgs.push_back("-lc"); - if (!Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-lgcc"); - CmdArgs.push_back("-lm"); - } - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); - } - CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); - - getToolChain().addProfileRTLibs(Args, CmdArgs); - - const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - switch (getToolChain().getArch()) { - case llvm::Triple::x86: - // When building 32-bit code on OpenBSD/amd64, we have to explicitly - // instruct as in the base system to assemble 32-bit code. - CmdArgs.push_back("--32"); - break; - - case llvm::Triple::ppc: - CmdArgs.push_back("-mppc"); - CmdArgs.push_back("-many"); - break; - - case llvm::Triple::sparc: - case llvm::Triple::sparcel: { - CmdArgs.push_back("-32"); - std::string CPU = getCPUName(Args, getToolChain().getTriple()); - CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - - case llvm::Triple::sparcv9: { - CmdArgs.push_back("-64"); - std::string CPU = getCPUName(Args, getToolChain().getTriple()); - CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - - case llvm::Triple::mips64: - case llvm::Triple::mips64el: { - StringRef CPUName; - StringRef ABIName; - mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); - - CmdArgs.push_back("-mabi"); - CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data()); - - if (getToolChain().getArch() == llvm::Triple::mips64) - CmdArgs.push_back("-EB"); - else - CmdArgs.push_back("-EL"); - - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - - default: - break; - } - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const Driver &D = getToolChain().getDriver(); - ArgStringList CmdArgs; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo". Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_w); - - if (getToolChain().getArch() == llvm::Triple::mips64) - CmdArgs.push_back("-EB"); - else if (getToolChain().getArch() == llvm::Triple::mips64el) - CmdArgs.push_back("-EL"); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) { - CmdArgs.push_back("-e"); - CmdArgs.push_back("__start"); - } - - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-Bstatic"); - } else { - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - CmdArgs.push_back("--eh-frame-hdr"); - CmdArgs.push_back("-Bdynamic"); - if (Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-shared"); - } else { - CmdArgs.push_back("-dynamic-linker"); - CmdArgs.push_back("/usr/libexec/ld.so"); - } - } - - if (Args.hasArg(options::OPT_nopie)) - CmdArgs.push_back("-nopie"); - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!Args.hasArg(options::OPT_shared)) { - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o"))); - else - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crt0.o"))); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); - } else { - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); - } - } - - std::string Triple = getToolChain().getTripleString(); - if (Triple.substr(0, 6) == "x86_64") - Triple.replace(0, 6, "amd64"); - CmdArgs.push_back( - Args.MakeArgString("-L/usr/lib/gcc-lib/" + Triple + "/4.2.1")); - - Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, - options::OPT_e, options::OPT_s, options::OPT_t, - options::OPT_Z_Flag, options::OPT_r}); - - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - if (D.CCCIsCXX()) { - getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lm_p"); - else - CmdArgs.push_back("-lm"); - } - - // FIXME: For some reason GCC passes -lgcc before adding - // the default system libraries. Just mimic this for now. - CmdArgs.push_back("-lgcc"); - - if (Args.hasArg(options::OPT_pthread)) { - if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lpthread_p"); - else - CmdArgs.push_back("-lpthread"); - } - - if (!Args.hasArg(options::OPT_shared)) { - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lc_p"); - else - CmdArgs.push_back("-lc"); - } - - CmdArgs.push_back("-lgcc"); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!Args.hasArg(options::OPT_shared)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); - else - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); - } - - const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void bitrig::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const Driver &D = getToolChain().getDriver(); - ArgStringList CmdArgs; - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) { - CmdArgs.push_back("-e"); - CmdArgs.push_back("__start"); - } - - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-Bstatic"); - } else { - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - CmdArgs.push_back("--eh-frame-hdr"); - CmdArgs.push_back("-Bdynamic"); - if (Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-shared"); - } else { - CmdArgs.push_back("-dynamic-linker"); - CmdArgs.push_back("/usr/libexec/ld.so"); - } - } - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!Args.hasArg(options::OPT_shared)) { - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o"))); - else - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crt0.o"))); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); - } else { - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); - } - } - - Args.AddAllArgs(CmdArgs, - {options::OPT_L, options::OPT_T_Group, options::OPT_e}); - - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - if (D.CCCIsCXX()) { - getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lm_p"); - else - CmdArgs.push_back("-lm"); - } - - if (Args.hasArg(options::OPT_pthread)) { - if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lpthread_p"); - else - CmdArgs.push_back("-lpthread"); - } - - if (!Args.hasArg(options::OPT_shared)) { - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lc_p"); - else - CmdArgs.push_back("-lc"); - } - - StringRef MyArch; - switch (getToolChain().getArch()) { - case llvm::Triple::arm: - MyArch = "arm"; - break; - case llvm::Triple::x86: - MyArch = "i386"; - break; - case llvm::Triple::x86_64: - MyArch = "amd64"; - break; - default: - llvm_unreachable("Unsupported architecture"); - } - CmdArgs.push_back(Args.MakeArgString("-lclang_rt." + MyArch)); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!Args.hasArg(options::OPT_shared)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); - else - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); - } - - const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - // When building 32-bit code on FreeBSD/amd64, we have to explicitly - // instruct as in the base system to assemble 32-bit code. - switch (getToolChain().getArch()) { - default: - break; - case llvm::Triple::x86: - CmdArgs.push_back("--32"); - break; - case llvm::Triple::ppc: - CmdArgs.push_back("-a32"); - break; - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: { - StringRef CPUName; - StringRef ABIName; - mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); - - CmdArgs.push_back("-march"); - CmdArgs.push_back(CPUName.data()); - - CmdArgs.push_back("-mabi"); - CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data()); - - if (getToolChain().getArch() == llvm::Triple::mips || - getToolChain().getArch() == llvm::Triple::mips64) - CmdArgs.push_back("-EB"); - else - CmdArgs.push_back("-EL"); - - if (Arg *A = Args.getLastArg(options::OPT_G)) { - StringRef v = A->getValue(); - CmdArgs.push_back(Args.MakeArgString("-G" + v)); - A->claim(); - } - - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: { - arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args); - - if (ABI == arm::FloatABI::Hard) - CmdArgs.push_back("-mfpu=vfp"); - else - CmdArgs.push_back("-mfpu=softvfp"); - - switch (getToolChain().getTriple().getEnvironment()) { - case llvm::Triple::GNUEABIHF: - case llvm::Triple::GNUEABI: - case llvm::Triple::EABI: - CmdArgs.push_back("-meabi=5"); - break; - - default: - CmdArgs.push_back("-matpcs"); - } - break; - } - case llvm::Triple::sparc: - case llvm::Triple::sparcel: - case llvm::Triple::sparcv9: { - std::string CPU = getCPUName(Args, getToolChain().getTriple()); - CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - } - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const toolchains::FreeBSD &ToolChain = - static_cast(getToolChain()); - const Driver &D = ToolChain.getDriver(); - const llvm::Triple::ArchType Arch = ToolChain.getArch(); - const bool IsPIE = - !Args.hasArg(options::OPT_shared) && - (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault()); - ArgStringList CmdArgs; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo". Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_w); - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - if (IsPIE) - CmdArgs.push_back("-pie"); - - CmdArgs.push_back("--eh-frame-hdr"); - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-Bstatic"); - } else { - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - if (Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-Bshareable"); - } else { - CmdArgs.push_back("-dynamic-linker"); - CmdArgs.push_back("/libexec/ld-elf.so.1"); - } - if (ToolChain.getTriple().getOSMajorVersion() >= 9) { - if (Arch == llvm::Triple::arm || Arch == llvm::Triple::sparc || - Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) { - CmdArgs.push_back("--hash-style=both"); - } - } - CmdArgs.push_back("--enable-new-dtags"); - } - - // When building 32-bit code on FreeBSD/amd64, we have to explicitly - // instruct ld in the base system to link 32-bit code. - if (Arch == llvm::Triple::x86) { - CmdArgs.push_back("-m"); - CmdArgs.push_back("elf_i386_fbsd"); - } - - if (Arch == llvm::Triple::ppc) { - CmdArgs.push_back("-m"); - CmdArgs.push_back("elf32ppc_fbsd"); - } - - if (Arg *A = Args.getLastArg(options::OPT_G)) { - if (ToolChain.getArch() == llvm::Triple::mips || - ToolChain.getArch() == llvm::Triple::mipsel || - ToolChain.getArch() == llvm::Triple::mips64 || - ToolChain.getArch() == llvm::Triple::mips64el) { - StringRef v = A->getValue(); - CmdArgs.push_back(Args.MakeArgString("-G" + v)); - A->claim(); - } - } - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - const char *crt1 = nullptr; - if (!Args.hasArg(options::OPT_shared)) { - if (Args.hasArg(options::OPT_pg)) - crt1 = "gcrt1.o"; - else if (IsPIE) - crt1 = "Scrt1.o"; - else - crt1 = "crt1.o"; - } - if (crt1) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); - - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); - - const char *crtbegin = nullptr; - if (Args.hasArg(options::OPT_static)) - crtbegin = "crtbeginT.o"; - else if (Args.hasArg(options::OPT_shared) || IsPIE) - crtbegin = "crtbeginS.o"; - else - crtbegin = "crtbegin.o"; - - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - ToolChain.AddFilePathLibArgs(Args, CmdArgs); - Args.AddAllArgs(CmdArgs, options::OPT_T_Group); - Args.AddAllArgs(CmdArgs, options::OPT_e); - Args.AddAllArgs(CmdArgs, options::OPT_s); - Args.AddAllArgs(CmdArgs, options::OPT_t); - Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag); - Args.AddAllArgs(CmdArgs, options::OPT_r); - - if (D.isUsingLTO()) - AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin, D); - - bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); - AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - addOpenMPRuntime(CmdArgs, ToolChain, Args); - if (D.CCCIsCXX()) { - ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lm_p"); - else - CmdArgs.push_back("-lm"); - } - if (NeedsSanitizerDeps) - linkSanitizerRuntimeDeps(ToolChain, CmdArgs); - // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding - // the default system libraries. Just mimic this for now. - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lgcc_p"); - else - CmdArgs.push_back("-lgcc"); - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-lgcc_eh"); - } else if (Args.hasArg(options::OPT_pg)) { - CmdArgs.push_back("-lgcc_eh_p"); - } else { - CmdArgs.push_back("--as-needed"); - CmdArgs.push_back("-lgcc_s"); - CmdArgs.push_back("--no-as-needed"); - } - - if (Args.hasArg(options::OPT_pthread)) { - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lpthread_p"); - else - CmdArgs.push_back("-lpthread"); - } - - if (Args.hasArg(options::OPT_pg)) { - if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("-lc"); - else - CmdArgs.push_back("-lc_p"); - CmdArgs.push_back("-lgcc_p"); - } else { - CmdArgs.push_back("-lc"); - CmdArgs.push_back("-lgcc"); - } - - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-lgcc_eh"); - } else if (Args.hasArg(options::OPT_pg)) { - CmdArgs.push_back("-lgcc_eh_p"); - } else { - CmdArgs.push_back("--as-needed"); - CmdArgs.push_back("-lgcc_s"); - CmdArgs.push_back("--no-as-needed"); - } - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (Args.hasArg(options::OPT_shared) || IsPIE) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o"))); - else - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); - } - - ToolChain.addProfileRTLibs(Args, CmdArgs); - - const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - // GNU as needs different flags for creating the correct output format - // on architectures with different ABIs or optional feature sets. - switch (getToolChain().getArch()) { - case llvm::Triple::x86: - CmdArgs.push_back("--32"); - break; - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: { - StringRef MArch, MCPU; - getARMArchCPUFromArgs(Args, MArch, MCPU, /*FromAs*/ true); - std::string Arch = - arm::getARMTargetCPU(MCPU, MArch, getToolChain().getTriple()); - CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch)); - break; - } - - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: { - StringRef CPUName; - StringRef ABIName; - mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); - - CmdArgs.push_back("-march"); - CmdArgs.push_back(CPUName.data()); - - CmdArgs.push_back("-mabi"); - CmdArgs.push_back(getGnuCompatibleMipsABIName(ABIName).data()); - - if (getToolChain().getArch() == llvm::Triple::mips || - getToolChain().getArch() == llvm::Triple::mips64) - CmdArgs.push_back("-EB"); - else - CmdArgs.push_back("-EL"); - - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - - case llvm::Triple::sparc: - case llvm::Triple::sparcel: { - CmdArgs.push_back("-32"); - std::string CPU = getCPUName(Args, getToolChain().getTriple()); - CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - - case llvm::Triple::sparcv9: { - CmdArgs.push_back("-64"); - std::string CPU = getCPUName(Args, getToolChain().getTriple()); - CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - - default: - break; - } - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString((getToolChain().GetProgramPath("as"))); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const Driver &D = getToolChain().getDriver(); - ArgStringList CmdArgs; - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - CmdArgs.push_back("--eh-frame-hdr"); - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-Bstatic"); - } else { - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - if (Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-Bshareable"); - } else { - Args.AddAllArgs(CmdArgs, options::OPT_pie); - CmdArgs.push_back("-dynamic-linker"); - CmdArgs.push_back("/libexec/ld.elf_so"); - } - } - - // Many NetBSD architectures support more than one ABI. - // Determine the correct emulation for ld. - switch (getToolChain().getArch()) { - case llvm::Triple::x86: - CmdArgs.push_back("-m"); - CmdArgs.push_back("elf_i386"); - break; - case llvm::Triple::arm: - case llvm::Triple::thumb: - CmdArgs.push_back("-m"); - switch (getToolChain().getTriple().getEnvironment()) { - case llvm::Triple::EABI: - case llvm::Triple::GNUEABI: - CmdArgs.push_back("armelf_nbsd_eabi"); - break; - case llvm::Triple::EABIHF: - case llvm::Triple::GNUEABIHF: - CmdArgs.push_back("armelf_nbsd_eabihf"); - break; - default: - CmdArgs.push_back("armelf_nbsd"); - break; - } - break; - case llvm::Triple::armeb: - case llvm::Triple::thumbeb: - arm::appendEBLinkFlags(Args, CmdArgs, getToolChain().getEffectiveTriple()); - CmdArgs.push_back("-m"); - switch (getToolChain().getTriple().getEnvironment()) { - case llvm::Triple::EABI: - case llvm::Triple::GNUEABI: - CmdArgs.push_back("armelfb_nbsd_eabi"); - break; - case llvm::Triple::EABIHF: - case llvm::Triple::GNUEABIHF: - CmdArgs.push_back("armelfb_nbsd_eabihf"); - break; - default: - CmdArgs.push_back("armelfb_nbsd"); - break; - } - break; - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - if (mips::hasMipsAbiArg(Args, "32")) { - CmdArgs.push_back("-m"); - if (getToolChain().getArch() == llvm::Triple::mips64) - CmdArgs.push_back("elf32btsmip"); - else - CmdArgs.push_back("elf32ltsmip"); - } else if (mips::hasMipsAbiArg(Args, "64")) { - CmdArgs.push_back("-m"); - if (getToolChain().getArch() == llvm::Triple::mips64) - CmdArgs.push_back("elf64btsmip"); - else - CmdArgs.push_back("elf64ltsmip"); - } - break; - case llvm::Triple::ppc: - CmdArgs.push_back("-m"); - CmdArgs.push_back("elf32ppc_nbsd"); - break; - - case llvm::Triple::ppc64: - case llvm::Triple::ppc64le: - CmdArgs.push_back("-m"); - CmdArgs.push_back("elf64ppc"); - break; - - case llvm::Triple::sparc: - CmdArgs.push_back("-m"); - CmdArgs.push_back("elf32_sparc"); - break; - - case llvm::Triple::sparcv9: - CmdArgs.push_back("-m"); - CmdArgs.push_back("elf64_sparc"); - break; - - default: - break; - } - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crt0.o"))); - } - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); - if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) { - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); - } else { - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); - } - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - Args.AddAllArgs(CmdArgs, options::OPT_T_Group); - Args.AddAllArgs(CmdArgs, options::OPT_e); - Args.AddAllArgs(CmdArgs, options::OPT_s); - Args.AddAllArgs(CmdArgs, options::OPT_t); - Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag); - Args.AddAllArgs(CmdArgs, options::OPT_r); - - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - - unsigned Major, Minor, Micro; - getToolChain().getTriple().getOSVersion(Major, Minor, Micro); - bool useLibgcc = true; - if (Major >= 7 || Major == 0) { - switch (getToolChain().getArch()) { - case llvm::Triple::aarch64: - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - case llvm::Triple::ppc: - case llvm::Triple::ppc64: - case llvm::Triple::ppc64le: - case llvm::Triple::sparc: - case llvm::Triple::sparcv9: - case llvm::Triple::x86: - case llvm::Triple::x86_64: - useLibgcc = false; - break; - default: - break; - } - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - addOpenMPRuntime(CmdArgs, getToolChain(), Args); - if (D.CCCIsCXX()) { - getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); - CmdArgs.push_back("-lm"); - } - if (Args.hasArg(options::OPT_pthread)) - CmdArgs.push_back("-lpthread"); - CmdArgs.push_back("-lc"); - - if (useLibgcc) { - if (Args.hasArg(options::OPT_static)) { - // libgcc_eh depends on libc, so resolve as much as possible, - // pull in any new requirements from libc and then get the rest - // of libgcc. - CmdArgs.push_back("-lgcc_eh"); - CmdArgs.push_back("-lc"); - CmdArgs.push_back("-lgcc"); - } else { - CmdArgs.push_back("-lgcc"); - CmdArgs.push_back("--as-needed"); - CmdArgs.push_back("-lgcc_s"); - CmdArgs.push_back("--no-as-needed"); - } - } - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); - else - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); - CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); - } - - getToolChain().addProfileRTLibs(Args, CmdArgs); - - const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - - const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); - - ArgStringList CmdArgs; - - llvm::Reloc::Model RelocationModel; - unsigned PICLevel; - bool IsPIE; - std::tie(RelocationModel, PICLevel, IsPIE) = - ParsePICArgs(getToolChain(), Triple, Args); - - switch (getToolChain().getArch()) { - default: - break; - // Add --32/--64 to make sure we get the format we want. - // This is incomplete - case llvm::Triple::x86: - CmdArgs.push_back("--32"); - break; - case llvm::Triple::x86_64: - if (getToolChain().getTriple().getEnvironment() == llvm::Triple::GNUX32) - CmdArgs.push_back("--x32"); - else - CmdArgs.push_back("--64"); - break; - case llvm::Triple::ppc: - CmdArgs.push_back("-a32"); - CmdArgs.push_back("-mppc"); - CmdArgs.push_back("-many"); - break; - case llvm::Triple::ppc64: - CmdArgs.push_back("-a64"); - CmdArgs.push_back("-mppc64"); - CmdArgs.push_back("-many"); - break; - case llvm::Triple::ppc64le: - CmdArgs.push_back("-a64"); - CmdArgs.push_back("-mppc64"); - CmdArgs.push_back("-many"); - CmdArgs.push_back("-mlittle-endian"); - break; - case llvm::Triple::sparc: - case llvm::Triple::sparcel: { - CmdArgs.push_back("-32"); - std::string CPU = getCPUName(Args, getToolChain().getTriple()); - CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - case llvm::Triple::sparcv9: { - CmdArgs.push_back("-64"); - std::string CPU = getCPUName(Args, getToolChain().getTriple()); - CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: { - const llvm::Triple &Triple2 = getToolChain().getTriple(); - switch (Triple2.getSubArch()) { - case llvm::Triple::ARMSubArch_v7: - CmdArgs.push_back("-mfpu=neon"); - break; - case llvm::Triple::ARMSubArch_v8: - CmdArgs.push_back("-mfpu=crypto-neon-fp-armv8"); - break; - default: - break; - } - - switch (arm::getARMFloatABI(getToolChain(), Args)) { - case arm::FloatABI::Invalid: llvm_unreachable("must have an ABI!"); - case arm::FloatABI::Soft: - CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=soft")); - break; - case arm::FloatABI::SoftFP: - CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=softfp")); - break; - case arm::FloatABI::Hard: - CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=hard")); - break; - } - - Args.AddLastArg(CmdArgs, options::OPT_march_EQ); - - // FIXME: remove krait check when GNU tools support krait cpu - // for now replace it with -mcpu=cortex-a15 to avoid a lower - // march from being picked in the absence of a cpu flag. - Arg *A; - if ((A = Args.getLastArg(options::OPT_mcpu_EQ)) && - StringRef(A->getValue()).equals_lower("krait")) - CmdArgs.push_back("-mcpu=cortex-a15"); - else - Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ); - Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ); - break; - } - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: { - StringRef CPUName; - StringRef ABIName; - mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); - ABIName = getGnuCompatibleMipsABIName(ABIName); - - CmdArgs.push_back("-march"); - CmdArgs.push_back(CPUName.data()); - - CmdArgs.push_back("-mabi"); - CmdArgs.push_back(ABIName.data()); - - // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE, - // or -mshared (not implemented) is in effect. - if (RelocationModel == llvm::Reloc::Static) - CmdArgs.push_back("-mno-shared"); - - // LLVM doesn't support -mplt yet and acts as if it is always given. - // However, -mplt has no effect with the N64 ABI. - CmdArgs.push_back(ABIName == "64" ? "-KPIC" : "-call_nonpic"); - - if (getToolChain().getArch() == llvm::Triple::mips || - getToolChain().getArch() == llvm::Triple::mips64) - CmdArgs.push_back("-EB"); - else - CmdArgs.push_back("-EL"); - - if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) { - if (StringRef(A->getValue()) == "2008") - CmdArgs.push_back(Args.MakeArgString("-mnan=2008")); - } - - // Add the last -mfp32/-mfpxx/-mfp64 or -mfpxx if it is enabled by default. - if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx, - options::OPT_mfp64)) { - A->claim(); - A->render(Args, CmdArgs); - } else if (mips::shouldUseFPXX( - Args, getToolChain().getTriple(), CPUName, ABIName, - getMipsFloatABI(getToolChain().getDriver(), Args))) - CmdArgs.push_back("-mfpxx"); - - // Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of - // -mno-mips16 is actually -no-mips16. - if (Arg *A = - Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16)) { - if (A->getOption().matches(options::OPT_mips16)) { - A->claim(); - A->render(Args, CmdArgs); - } else { - A->claim(); - CmdArgs.push_back("-no-mips16"); - } - } - - Args.AddLastArg(CmdArgs, options::OPT_mmicromips, - options::OPT_mno_micromips); - Args.AddLastArg(CmdArgs, options::OPT_mdsp, options::OPT_mno_dsp); - Args.AddLastArg(CmdArgs, options::OPT_mdspr2, options::OPT_mno_dspr2); - - if (Arg *A = Args.getLastArg(options::OPT_mmsa, options::OPT_mno_msa)) { - // Do not use AddLastArg because not all versions of MIPS assembler - // support -mmsa / -mno-msa options. - if (A->getOption().matches(options::OPT_mmsa)) - CmdArgs.push_back(Args.MakeArgString("-mmsa")); - } - - Args.AddLastArg(CmdArgs, options::OPT_mhard_float, - options::OPT_msoft_float); - - Args.AddLastArg(CmdArgs, options::OPT_mdouble_float, - options::OPT_msingle_float); - - Args.AddLastArg(CmdArgs, options::OPT_modd_spreg, - options::OPT_mno_odd_spreg); - - AddAssemblerKPIC(getToolChain(), Args, CmdArgs); - break; - } - case llvm::Triple::systemz: { - // Always pass an -march option, since our default of z10 is later - // than the GNU assembler's default. - StringRef CPUName = getSystemZTargetCPU(Args); - CmdArgs.push_back(Args.MakeArgString("-march=" + CPUName)); - break; - } - } - - Args.AddAllArgs(CmdArgs, options::OPT_I); - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); - - // Handle the debug info splitting at object creation time if we're - // creating an object. - // TODO: Currently only works on linux with newer objcopy. - if (Args.hasArg(options::OPT_gsplit_dwarf) && - getToolChain().getTriple().isOSLinux()) - SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, - SplitDebugName(Args, Inputs[0])); -} - -static void AddLibgcc(const llvm::Triple &Triple, const Driver &D, - ArgStringList &CmdArgs, const ArgList &Args) { - bool isAndroid = Triple.isAndroid(); - bool isCygMing = Triple.isOSCygMing(); - bool IsIAMCU = Triple.isOSIAMCU(); - bool StaticLibgcc = Args.hasArg(options::OPT_static_libgcc) || - Args.hasArg(options::OPT_static); - if (!D.CCCIsCXX()) - CmdArgs.push_back("-lgcc"); - - if (StaticLibgcc || isAndroid) { - if (D.CCCIsCXX()) - CmdArgs.push_back("-lgcc"); - } else { - if (!D.CCCIsCXX() && !isCygMing) - CmdArgs.push_back("--as-needed"); - CmdArgs.push_back("-lgcc_s"); - if (!D.CCCIsCXX() && !isCygMing) - CmdArgs.push_back("--no-as-needed"); - } - - if (StaticLibgcc && !isAndroid && !IsIAMCU) - CmdArgs.push_back("-lgcc_eh"); - else if (!Args.hasArg(options::OPT_shared) && D.CCCIsCXX()) - CmdArgs.push_back("-lgcc"); - - // According to Android ABI, we have to link with libdl if we are - // linking with non-static libgcc. - // - // NOTE: This fixes a link error on Android MIPS as well. The non-static - // libgcc for MIPS relies on _Unwind_Find_FDE and dl_iterate_phdr from libdl. - if (isAndroid && !StaticLibgcc) - CmdArgs.push_back("-ldl"); -} - -static void AddRunTimeLibs(const ToolChain &TC, const Driver &D, - ArgStringList &CmdArgs, const ArgList &Args) { - // Make use of compiler-rt if --rtlib option is used - ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(Args); - - switch (RLT) { - case ToolChain::RLT_CompilerRT: - switch (TC.getTriple().getOS()) { - default: - llvm_unreachable("unsupported OS"); - case llvm::Triple::Win32: - case llvm::Triple::Linux: - case llvm::Triple::Fuchsia: - addClangRT(TC, Args, CmdArgs); - break; - } - break; - case ToolChain::RLT_Libgcc: - // Make sure libgcc is not used under MSVC environment by default - if (TC.getTriple().isKnownWindowsMSVCEnvironment()) { - // Issue error diagnostic if libgcc is explicitly specified - // through command line as --rtlib option argument. - if (Args.hasArg(options::OPT_rtlib_EQ)) { - TC.getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform) - << Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "MSVC"; - } - } else - AddLibgcc(TC.getTriple(), D, CmdArgs, Args); - break; - } -} - -static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { - switch (T.getArch()) { - case llvm::Triple::x86: - if (T.isOSIAMCU()) - return "elf_iamcu"; - return "elf_i386"; - case llvm::Triple::aarch64: - return "aarch64linux"; - case llvm::Triple::aarch64_be: - return "aarch64_be_linux"; - case llvm::Triple::arm: - case llvm::Triple::thumb: - return "armelf_linux_eabi"; - case llvm::Triple::armeb: - case llvm::Triple::thumbeb: - return "armelfb_linux_eabi"; - case llvm::Triple::ppc: - return "elf32ppclinux"; - case llvm::Triple::ppc64: - return "elf64ppc"; - case llvm::Triple::ppc64le: - return "elf64lppc"; - case llvm::Triple::sparc: - case llvm::Triple::sparcel: - return "elf32_sparc"; - case llvm::Triple::sparcv9: - return "elf64_sparc"; - case llvm::Triple::mips: - return "elf32btsmip"; - case llvm::Triple::mipsel: - return "elf32ltsmip"; - case llvm::Triple::mips64: - if (mips::hasMipsAbiArg(Args, "n32")) - return "elf32btsmipn32"; - return "elf64btsmip"; - case llvm::Triple::mips64el: - if (mips::hasMipsAbiArg(Args, "n32")) - return "elf32ltsmipn32"; - return "elf64ltsmip"; - case llvm::Triple::systemz: - return "elf64_s390"; - case llvm::Triple::x86_64: - if (T.getEnvironment() == llvm::Triple::GNUX32) - return "elf32_x86_64"; - return "elf_x86_64"; - default: - return nullptr; - } -} - -void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const toolchains::Linux &ToolChain = - static_cast(getToolChain()); - const Driver &D = ToolChain.getDriver(); - - const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); - - const llvm::Triple::ArchType Arch = ToolChain.getArch(); - const bool isAndroid = ToolChain.getTriple().isAndroid(); - const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU(); - const bool IsPIE = - !Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_static) && - (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault()); - const bool HasCRTBeginEndFiles = - ToolChain.getTriple().hasEnvironment() || - (ToolChain.getTriple().getVendor() != llvm::Triple::MipsTechnologies); - - ArgStringList CmdArgs; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo". Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_w); - - const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - if (llvm::sys::path::filename(Exec) == "lld") { - CmdArgs.push_back("-flavor"); - CmdArgs.push_back("old-gnu"); - CmdArgs.push_back("-target"); - CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString())); - } - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - if (IsPIE) - CmdArgs.push_back("-pie"); - - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - - if (Args.hasArg(options::OPT_s)) - CmdArgs.push_back("-s"); - - if (Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumbeb) - arm::appendEBLinkFlags(Args, CmdArgs, Triple); - - // Most Android ARM64 targets should enable the linker fix for erratum - // 843419. Only non-Cortex-A53 devices are allowed to skip this flag. - if (Arch == llvm::Triple::aarch64 && isAndroid) { - std::string CPU = getCPUName(Args, Triple); - if (CPU.empty() || CPU == "generic" || CPU == "cortex-a53") - CmdArgs.push_back("--fix-cortex-a53-843419"); - } - - for (const auto &Opt : ToolChain.ExtraOpts) - CmdArgs.push_back(Opt.c_str()); - - if (!Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("--eh-frame-hdr"); - } - - if (const char *LDMOption = getLDMOption(ToolChain.getTriple(), Args)) { - CmdArgs.push_back("-m"); - CmdArgs.push_back(LDMOption); - } else { - D.Diag(diag::err_target_unknown_triple) << Triple.str(); - return; - } - - if (Args.hasArg(options::OPT_static)) { - if (Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb || - Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb) - CmdArgs.push_back("-Bstatic"); - else - CmdArgs.push_back("-static"); - } else if (Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-shared"); - } - - if (!Args.hasArg(options::OPT_static)) { - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - - if (!Args.hasArg(options::OPT_shared)) { - const std::string Loader = - D.DyldPrefix + ToolChain.getDynamicLinker(Args); - CmdArgs.push_back("-dynamic-linker"); - CmdArgs.push_back(Args.MakeArgString(Loader)); - } - } - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!isAndroid && !IsIAMCU) { - const char *crt1 = nullptr; - if (!Args.hasArg(options::OPT_shared)) { - if (Args.hasArg(options::OPT_pg)) - crt1 = "gcrt1.o"; - else if (IsPIE) - crt1 = "Scrt1.o"; - else - crt1 = "crt1.o"; - } - if (crt1) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); - - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); - } - - if (IsIAMCU) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o"))); - else { - const char *crtbegin; - if (Args.hasArg(options::OPT_static)) - crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o"; - else if (Args.hasArg(options::OPT_shared)) - crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o"; - else if (IsPIE) - crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o"; - else - crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o"; - - if (HasCRTBeginEndFiles) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); - } - - // Add crtfastmath.o if available and fast math is enabled. - ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs); - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - Args.AddAllArgs(CmdArgs, options::OPT_u); - - ToolChain.AddFilePathLibArgs(Args, CmdArgs); - - if (D.isUsingLTO()) - AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin, D); - - if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) - CmdArgs.push_back("--no-demangle"); - - bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); - bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); - AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - // The profile runtime also needs access to system libraries. - getToolChain().addProfileRTLibs(Args, CmdArgs); - - if (D.CCCIsCXX() && - !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && - !Args.hasArg(options::OPT_static); - if (OnlyLibstdcxxStatic) - CmdArgs.push_back("-Bstatic"); - ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - if (OnlyLibstdcxxStatic) - CmdArgs.push_back("-Bdynamic"); - CmdArgs.push_back("-lm"); - } - // Silence warnings when linking C code with a C++ '-stdlib' argument. - Args.ClaimAllArgs(options::OPT_stdlib_EQ); - - if (!Args.hasArg(options::OPT_nostdlib)) { - if (!Args.hasArg(options::OPT_nodefaultlibs)) { - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("--start-group"); - - if (NeedsSanitizerDeps) - linkSanitizerRuntimeDeps(ToolChain, CmdArgs); - - if (NeedsXRayDeps) - linkXRayRuntimeDeps(ToolChain, Args, CmdArgs); - - bool WantPthread = Args.hasArg(options::OPT_pthread) || - Args.hasArg(options::OPT_pthreads); - - if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false)) { - // OpenMP runtimes implies pthreads when using the GNU toolchain. - // FIXME: Does this really make sense for all GNU toolchains? - WantPthread = true; - - // Also link the particular OpenMP runtimes. - switch (ToolChain.getDriver().getOpenMPRuntime(Args)) { - case Driver::OMPRT_OMP: - CmdArgs.push_back("-lomp"); - break; - case Driver::OMPRT_GOMP: - CmdArgs.push_back("-lgomp"); - - // FIXME: Exclude this for platforms with libgomp that don't require - // librt. Most modern Linux platforms require it, but some may not. - CmdArgs.push_back("-lrt"); - break; - case Driver::OMPRT_IOMP5: - CmdArgs.push_back("-liomp5"); - break; - case Driver::OMPRT_Unknown: - // Already diagnosed. - break; - } - if (JA.isHostOffloading(Action::OFK_OpenMP)) - CmdArgs.push_back("-lomptarget"); - } - - AddRunTimeLibs(ToolChain, D, CmdArgs, Args); - - if (WantPthread && !isAndroid) - CmdArgs.push_back("-lpthread"); - - if (Args.hasArg(options::OPT_fsplit_stack)) - CmdArgs.push_back("--wrap=pthread_create"); - - CmdArgs.push_back("-lc"); - - // Add IAMCU specific libs, if needed. - if (IsIAMCU) - CmdArgs.push_back("-lgloss"); - - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("--end-group"); - else - AddRunTimeLibs(ToolChain, D, CmdArgs, Args); - - // Add IAMCU specific libs (outside the group), if needed. - if (IsIAMCU) { - CmdArgs.push_back("--as-needed"); - CmdArgs.push_back("-lsoftfp"); - CmdArgs.push_back("--no-as-needed"); - } - } - - if (!Args.hasArg(options::OPT_nostartfiles) && !IsIAMCU) { - const char *crtend; - if (Args.hasArg(options::OPT_shared)) - crtend = isAndroid ? "crtend_so.o" : "crtendS.o"; - else if (IsPIE) - crtend = isAndroid ? "crtend_android.o" : "crtendS.o"; - else - crtend = isAndroid ? "crtend_android.o" : "crtend.o"; - - if (HasCRTBeginEndFiles) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend))); - if (!isAndroid) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); - } - } - - // Add OpenMP offloading linker script args if required. - AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); - - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -// NaCl ARM assembly (inline or standalone) can be written with a set of macros -// for the various SFI requirements like register masking. The assembly tool -// inserts the file containing the macros as an input into all the assembly -// jobs. -void nacltools::AssemblerARM::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const toolchains::NaClToolChain &ToolChain = - static_cast(getToolChain()); - InputInfo NaClMacros(types::TY_PP_Asm, ToolChain.GetNaClArmMacrosPath(), - "nacl-arm-macros.s"); - InputInfoList NewInputs; - NewInputs.push_back(NaClMacros); - NewInputs.append(Inputs.begin(), Inputs.end()); - gnutools::Assembler::ConstructJob(C, JA, Output, NewInputs, Args, - LinkingOutput); -} - -// This is quite similar to gnutools::Linker::ConstructJob with changes that -// we use static by default, do not yet support sanitizers or LTO, and a few -// others. Eventually we can support more of that and hopefully migrate back -// to gnutools::Linker. -void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - - const toolchains::NaClToolChain &ToolChain = - static_cast(getToolChain()); - const Driver &D = ToolChain.getDriver(); - const llvm::Triple::ArchType Arch = ToolChain.getArch(); - const bool IsStatic = - !Args.hasArg(options::OPT_dynamic) && !Args.hasArg(options::OPT_shared); - - ArgStringList CmdArgs; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo". Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_w); - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - - if (Args.hasArg(options::OPT_s)) - CmdArgs.push_back("-s"); - - // NaClToolChain doesn't have ExtraOpts like Linux; the only relevant flag - // from there is --build-id, which we do want. - CmdArgs.push_back("--build-id"); - - if (!IsStatic) - CmdArgs.push_back("--eh-frame-hdr"); - - CmdArgs.push_back("-m"); - if (Arch == llvm::Triple::x86) - CmdArgs.push_back("elf_i386_nacl"); - else if (Arch == llvm::Triple::arm) - CmdArgs.push_back("armelf_nacl"); - else if (Arch == llvm::Triple::x86_64) - CmdArgs.push_back("elf_x86_64_nacl"); - else if (Arch == llvm::Triple::mipsel) - CmdArgs.push_back("mipselelf_nacl"); - else - D.Diag(diag::err_target_unsupported_arch) << ToolChain.getArchName() - << "Native Client"; - - if (IsStatic) - CmdArgs.push_back("-static"); - else if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("-shared"); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!Args.hasArg(options::OPT_shared)) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o"))); - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); - - const char *crtbegin; - if (IsStatic) - crtbegin = "crtbeginT.o"; - else if (Args.hasArg(options::OPT_shared)) - crtbegin = "crtbeginS.o"; - else - crtbegin = "crtbegin.o"; - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - Args.AddAllArgs(CmdArgs, options::OPT_u); - - ToolChain.AddFilePathLibArgs(Args, CmdArgs); - - if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) - CmdArgs.push_back("--no-demangle"); - - AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - - if (D.CCCIsCXX() && - !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - bool OnlyLibstdcxxStatic = - Args.hasArg(options::OPT_static_libstdcxx) && !IsStatic; - if (OnlyLibstdcxxStatic) - CmdArgs.push_back("-Bstatic"); - ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - if (OnlyLibstdcxxStatic) - CmdArgs.push_back("-Bdynamic"); - CmdArgs.push_back("-lm"); - } - - if (!Args.hasArg(options::OPT_nostdlib)) { - if (!Args.hasArg(options::OPT_nodefaultlibs)) { - // Always use groups, since it has no effect on dynamic libraries. - CmdArgs.push_back("--start-group"); - CmdArgs.push_back("-lc"); - // NaCl's libc++ currently requires libpthread, so just always include it - // in the group for C++. - if (Args.hasArg(options::OPT_pthread) || - Args.hasArg(options::OPT_pthreads) || D.CCCIsCXX()) { - // Gold, used by Mips, handles nested groups differently than ld, and - // without '-lnacl' it prefers symbols from libpthread.a over libnacl.a, - // which is not a desired behaviour here. - // See https://sourceware.org/ml/binutils/2015-03/msg00034.html - if (getToolChain().getArch() == llvm::Triple::mipsel) - CmdArgs.push_back("-lnacl"); - - CmdArgs.push_back("-lpthread"); - } - - CmdArgs.push_back("-lgcc"); - CmdArgs.push_back("--as-needed"); - if (IsStatic) - CmdArgs.push_back("-lgcc_eh"); - else - CmdArgs.push_back("-lgcc_s"); - CmdArgs.push_back("--no-as-needed"); - - // Mips needs to create and use pnacl_legacy library that contains - // definitions from bitcode/pnaclmm.c and definitions for - // __nacl_tp_tls_offset() and __nacl_tp_tdb_offset(). - if (getToolChain().getArch() == llvm::Triple::mipsel) - CmdArgs.push_back("-lpnacl_legacy"); - - CmdArgs.push_back("--end-group"); - } - - if (!Args.hasArg(options::OPT_nostartfiles)) { - const char *crtend; - if (Args.hasArg(options::OPT_shared)) - crtend = "crtendS.o"; - else - crtend = "crtend.o"; - - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend))); - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); - } - } - - const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const toolchains::Fuchsia &ToolChain = - static_cast(getToolChain()); - const Driver &D = ToolChain.getDriver(); - - ArgStringList CmdArgs; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo". Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_w); - - const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - if (llvm::sys::path::stem(Exec).equals_lower("lld")) { - CmdArgs.push_back("-flavor"); - CmdArgs.push_back("gnu"); - } - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - if (!Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_r)) - CmdArgs.push_back("-pie"); - - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - - if (Args.hasArg(options::OPT_s)) - CmdArgs.push_back("-s"); - - if (Args.hasArg(options::OPT_r)) - CmdArgs.push_back("-r"); - else - CmdArgs.push_back("--build-id"); - - if (!Args.hasArg(options::OPT_static)) - CmdArgs.push_back("--eh-frame-hdr"); - - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("-Bstatic"); - else if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("-shared"); - - if (!Args.hasArg(options::OPT_static)) { - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - - if (!Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-dynamic-linker"); - CmdArgs.push_back(Args.MakeArgString(D.DyldPrefix + "ld.so.1")); - } - } - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("Scrt1.o"))); - } - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - Args.AddAllArgs(CmdArgs, options::OPT_u); - - ToolChain.AddFilePathLibArgs(Args, CmdArgs); - - AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("-Bdynamic"); - - if (D.CCCIsCXX()) { - bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && - !Args.hasArg(options::OPT_static); - if (OnlyLibstdcxxStatic) - CmdArgs.push_back("-Bstatic"); - ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - if (OnlyLibstdcxxStatic) - CmdArgs.push_back("-Bdynamic"); - CmdArgs.push_back("-lm"); - } - - AddRunTimeLibs(ToolChain, D, CmdArgs, Args); - - if (Args.hasArg(options::OPT_pthread) || - Args.hasArg(options::OPT_pthreads)) - CmdArgs.push_back("-lpthread"); - - if (Args.hasArg(options::OPT_fsplit_stack)) - CmdArgs.push_back("--wrap=pthread_create"); - - CmdArgs.push_back("-lc"); - } - - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void minix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void minix::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const Driver &D = getToolChain().getDriver(); - ArgStringList CmdArgs; - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); - CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); - CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); - } - - Args.AddAllArgs(CmdArgs, - {options::OPT_L, options::OPT_T_Group, options::OPT_e}); - - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - - getToolChain().addProfileRTLibs(Args, CmdArgs); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - if (D.CCCIsCXX()) { - getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); - CmdArgs.push_back("-lm"); - } - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (Args.hasArg(options::OPT_pthread)) - CmdArgs.push_back("-lpthread"); - CmdArgs.push_back("-lc"); - CmdArgs.push_back("-lCompilerRT-Generic"); - CmdArgs.push_back("-L/usr/pkg/compiler-rt/lib"); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); - } - - const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -/// DragonFly Tools - -// For now, DragonFly Assemble does just about the same as for -// FreeBSD, but this may change soon. -void dragonfly::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - // When building 32-bit code on DragonFly/pc64, we have to explicitly - // instruct as in the base system to assemble 32-bit code. - if (getToolChain().getArch() == llvm::Triple::x86) - CmdArgs.push_back("--32"); - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const Driver &D = getToolChain().getDriver(); - ArgStringList CmdArgs; - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - CmdArgs.push_back("--eh-frame-hdr"); - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-Bstatic"); - } else { - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("-Bshareable"); - else { - CmdArgs.push_back("-dynamic-linker"); - CmdArgs.push_back("/usr/libexec/ld-elf.so.2"); - } - CmdArgs.push_back("--hash-style=gnu"); - CmdArgs.push_back("--enable-new-dtags"); - } - - // When building 32-bit code on DragonFly/pc64, we have to explicitly - // instruct ld in the base system to link 32-bit code. - if (getToolChain().getArch() == llvm::Triple::x86) { - CmdArgs.push_back("-m"); - CmdArgs.push_back("elf_i386"); - } - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (!Args.hasArg(options::OPT_shared)) { - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("gcrt1.o"))); - else { - if (Args.hasArg(options::OPT_pie)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("Scrt1.o"))); - else - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); - } - } - CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); - if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o"))); - else - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); - } - - Args.AddAllArgs(CmdArgs, - {options::OPT_L, options::OPT_T_Group, options::OPT_e}); - - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - CmdArgs.push_back("-L/usr/lib/gcc50"); - - if (!Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-rpath"); - CmdArgs.push_back("/usr/lib/gcc50"); - } - - if (D.CCCIsCXX()) { - getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); - CmdArgs.push_back("-lm"); - } - - if (Args.hasArg(options::OPT_pthread)) - CmdArgs.push_back("-lpthread"); - - if (!Args.hasArg(options::OPT_nolibc)) { - CmdArgs.push_back("-lc"); - } - - if (Args.hasArg(options::OPT_static) || - Args.hasArg(options::OPT_static_libgcc)) { - CmdArgs.push_back("-lgcc"); - CmdArgs.push_back("-lgcc_eh"); - } else { - if (Args.hasArg(options::OPT_shared_libgcc)) { - CmdArgs.push_back("-lgcc_pic"); - if (!Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("-lgcc"); - } else { - CmdArgs.push_back("-lgcc"); - CmdArgs.push_back("--as-needed"); - CmdArgs.push_back("-lgcc_pic"); - CmdArgs.push_back("--no-as-needed"); - } - } - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); - else - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); - CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); - } - - getToolChain().addProfileRTLibs(Args, CmdArgs); - - const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -// Try to find Exe from a Visual Studio distribution. This first tries to find -// an installed copy of Visual Studio and, failing that, looks in the PATH, -// making sure that whatever executable that's found is not a same-named exe -// from clang itself to prevent clang from falling back to itself. -static std::string FindVisualStudioExecutable(const ToolChain &TC, - const char *Exe, - const char *ClangProgramPath) { - const auto &MSVC = static_cast(TC); - std::string visualStudioBinDir; - if (MSVC.getVisualStudioBinariesFolder(ClangProgramPath, - visualStudioBinDir)) { - SmallString<128> FilePath(visualStudioBinDir); - llvm::sys::path::append(FilePath, Exe); - if (llvm::sys::fs::can_execute(FilePath.c_str())) - return FilePath.str(); - } - - return Exe; -} - -void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - const ToolChain &TC = getToolChain(); - - assert((Output.isFilename() || Output.isNothing()) && "invalid output"); - if (Output.isFilename()) - CmdArgs.push_back( - Args.MakeArgString(std::string("-out:") + Output.getFilename())); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) && - !C.getDriver().IsCLMode()) - CmdArgs.push_back("-defaultlib:libcmt"); - - if (!llvm::sys::Process::GetEnv("LIB")) { - // If the VC environment hasn't been configured (perhaps because the user - // did not run vcvarsall), try to build a consistent link environment. If - // the environment variable is set however, assume the user knows what - // they're doing. - std::string VisualStudioDir; - const auto &MSVC = static_cast(TC); - if (MSVC.getVisualStudioInstallDir(VisualStudioDir)) { - SmallString<128> LibDir(VisualStudioDir); - llvm::sys::path::append(LibDir, "VC", "lib"); - switch (MSVC.getArch()) { - case llvm::Triple::x86: - // x86 just puts the libraries directly in lib - break; - case llvm::Triple::x86_64: - llvm::sys::path::append(LibDir, "amd64"); - break; - case llvm::Triple::arm: - llvm::sys::path::append(LibDir, "arm"); - break; - default: - break; - } - CmdArgs.push_back( - Args.MakeArgString(std::string("-libpath:") + LibDir.c_str())); - - if (MSVC.useUniversalCRT(VisualStudioDir)) { - std::string UniversalCRTLibPath; - if (MSVC.getUniversalCRTLibraryPath(UniversalCRTLibPath)) - CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") + - UniversalCRTLibPath)); - } - } - - std::string WindowsSdkLibPath; - if (MSVC.getWindowsSDKLibraryPath(WindowsSdkLibPath)) - CmdArgs.push_back( - Args.MakeArgString(std::string("-libpath:") + WindowsSdkLibPath)); - } - - if (!C.getDriver().IsCLMode() && Args.hasArg(options::OPT_L)) - for (const auto &LibPath : Args.getAllArgValues(options::OPT_L)) - CmdArgs.push_back(Args.MakeArgString("-libpath:" + LibPath)); - - CmdArgs.push_back("-nologo"); - - if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7, - options::OPT__SLASH_Zd)) - CmdArgs.push_back("-debug"); - - bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd, - options::OPT_shared); - if (DLL) { - CmdArgs.push_back(Args.MakeArgString("-dll")); - - SmallString<128> ImplibName(Output.getFilename()); - llvm::sys::path::replace_extension(ImplibName, "lib"); - CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") + ImplibName)); - } - - if (TC.getSanitizerArgs().needsAsanRt()) { - CmdArgs.push_back(Args.MakeArgString("-debug")); - CmdArgs.push_back(Args.MakeArgString("-incremental:no")); - if (TC.getSanitizerArgs().needsSharedAsanRt() || - Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) { - for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"}) - CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib)); - // Make sure the dynamic runtime thunk is not optimized out at link time - // to ensure proper SEH handling. - CmdArgs.push_back(Args.MakeArgString( - TC.getArch() == llvm::Triple::x86 - ? "-include:___asan_seh_interceptor" - : "-include:__asan_seh_interceptor")); - } else if (DLL) { - CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk")); - } else { - for (const auto &Lib : {"asan", "asan_cxx"}) - CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib)); - } - } - - Args.AddAllArgValues(CmdArgs, options::OPT__SLASH_link); - - if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false)) { - CmdArgs.push_back("-nodefaultlib:vcomp.lib"); - CmdArgs.push_back("-nodefaultlib:vcompd.lib"); - CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") + - TC.getDriver().Dir + "/../lib")); - switch (TC.getDriver().getOpenMPRuntime(Args)) { - case Driver::OMPRT_OMP: - CmdArgs.push_back("-defaultlib:libomp.lib"); - break; - case Driver::OMPRT_IOMP5: - CmdArgs.push_back("-defaultlib:libiomp5md.lib"); - break; - case Driver::OMPRT_GOMP: - break; - case Driver::OMPRT_Unknown: - // Already diagnosed. - break; - } - } - - // Add compiler-rt lib in case if it was explicitly - // specified as an argument for --rtlib option. - if (!Args.hasArg(options::OPT_nostdlib)) { - AddRunTimeLibs(TC, TC.getDriver(), CmdArgs, Args); - } - - // Add filenames, libraries, and other linker inputs. - for (const auto &Input : Inputs) { - if (Input.isFilename()) { - CmdArgs.push_back(Input.getFilename()); - continue; - } - - const Arg &A = Input.getInputArg(); - - // Render -l options differently for the MSVC linker. - if (A.getOption().matches(options::OPT_l)) { - StringRef Lib = A.getValue(); - const char *LinkLibArg; - if (Lib.endswith(".lib")) - LinkLibArg = Args.MakeArgString(Lib); - else - LinkLibArg = Args.MakeArgString(Lib + ".lib"); - CmdArgs.push_back(LinkLibArg); - continue; - } - - // Otherwise, this is some other kind of linker input option like -Wl, -z, - // or -L. Render it, even if MSVC doesn't understand it. - A.renderAsInput(Args, CmdArgs); - } - - TC.addProfileRTLibs(Args, CmdArgs); - - // We need to special case some linker paths. In the case of lld, we need to - // translate 'lld' into 'lld-link', and in the case of the regular msvc - // linker, we need to use a special search algorithm. - llvm::SmallString<128> linkPath; - StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link"); - if (Linker.equals_lower("lld")) - Linker = "lld-link"; - - if (Linker.equals_lower("link")) { - // If we're using the MSVC linker, it's not sufficient to just use link - // from the program PATH, because other environments like GnuWin32 install - // their own link.exe which may come first. - linkPath = FindVisualStudioExecutable(TC, "link.exe", - C.getDriver().getClangProgramPath()); - } else { - linkPath = Linker; - llvm::sys::path::replace_extension(linkPath, "exe"); - linkPath = TC.GetProgramPath(linkPath.c_str()); - } - - const char *Exec = Args.MakeArgString(linkPath); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void visualstudio::Compiler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - C.addCommand(GetCommand(C, JA, Output, Inputs, Args, LinkingOutput)); -} - -std::unique_ptr visualstudio::Compiler::GetCommand( - Compilation &C, const JobAction &JA, const InputInfo &Output, - const InputInfoList &Inputs, const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - CmdArgs.push_back("/nologo"); - CmdArgs.push_back("/c"); // Compile only. - CmdArgs.push_back("/W0"); // No warnings. - - // The goal is to be able to invoke this tool correctly based on - // any flag accepted by clang-cl. - - // These are spelled the same way in clang and cl.exe,. - Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I}); - - // Optimization level. - if (Arg *A = Args.getLastArg(options::OPT_fbuiltin, options::OPT_fno_builtin)) - CmdArgs.push_back(A->getOption().getID() == options::OPT_fbuiltin ? "/Oi" - : "/Oi-"); - if (Arg *A = Args.getLastArg(options::OPT_O, options::OPT_O0)) { - if (A->getOption().getID() == options::OPT_O0) { - CmdArgs.push_back("/Od"); - } else { - CmdArgs.push_back("/Og"); - - StringRef OptLevel = A->getValue(); - if (OptLevel == "s" || OptLevel == "z") - CmdArgs.push_back("/Os"); - else - CmdArgs.push_back("/Ot"); - - CmdArgs.push_back("/Ob2"); - } - } - if (Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer, - options::OPT_fno_omit_frame_pointer)) - CmdArgs.push_back(A->getOption().getID() == options::OPT_fomit_frame_pointer - ? "/Oy" - : "/Oy-"); - if (!Args.hasArg(options::OPT_fwritable_strings)) - CmdArgs.push_back("/GF"); - - // Flags for which clang-cl has an alias. - // FIXME: How can we ensure this stays in sync with relevant clang-cl options? - - if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR, - /*default=*/false)) - CmdArgs.push_back("/GR-"); - - if (Args.hasFlag(options::OPT__SLASH_GS_, options::OPT__SLASH_GS, - /*default=*/false)) - CmdArgs.push_back("/GS-"); - - if (Arg *A = Args.getLastArg(options::OPT_ffunction_sections, - options::OPT_fno_function_sections)) - CmdArgs.push_back(A->getOption().getID() == options::OPT_ffunction_sections - ? "/Gy" - : "/Gy-"); - if (Arg *A = Args.getLastArg(options::OPT_fdata_sections, - options::OPT_fno_data_sections)) - CmdArgs.push_back( - A->getOption().getID() == options::OPT_fdata_sections ? "/Gw" : "/Gw-"); - if (Args.hasArg(options::OPT_fsyntax_only)) - CmdArgs.push_back("/Zs"); - if (Args.hasArg(options::OPT_g_Flag, options::OPT_gline_tables_only, - options::OPT__SLASH_Z7)) - CmdArgs.push_back("/Z7"); - - std::vector Includes = - Args.getAllArgValues(options::OPT_include); - for (const auto &Include : Includes) - CmdArgs.push_back(Args.MakeArgString(std::string("/FI") + Include)); - - // Flags that can simply be passed through. - Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LD); - Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LDd); - Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX); - Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX_); - Args.AddAllArgs(CmdArgs, options::OPT__SLASH_EH); - Args.AddAllArgs(CmdArgs, options::OPT__SLASH_Zl); - - // The order of these flags is relevant, so pick the last one. - if (Arg *A = Args.getLastArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd, - options::OPT__SLASH_MT, options::OPT__SLASH_MTd)) - A->render(Args, CmdArgs); - - // Use MSVC's default threadsafe statics behaviour unless there was a flag. - if (Arg *A = Args.getLastArg(options::OPT_fthreadsafe_statics, - options::OPT_fno_threadsafe_statics)) { - CmdArgs.push_back(A->getOption().getID() == options::OPT_fthreadsafe_statics - ? "/Zc:threadSafeInit" - : "/Zc:threadSafeInit-"); - } - - // Pass through all unknown arguments so that the fallback command can see - // them too. - Args.AddAllArgs(CmdArgs, options::OPT_UNKNOWN); - - // Input filename. - assert(Inputs.size() == 1); - const InputInfo &II = Inputs[0]; - assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX); - CmdArgs.push_back(II.getType() == types::TY_C ? "/Tc" : "/Tp"); - if (II.isFilename()) - CmdArgs.push_back(II.getFilename()); - else - II.getInputArg().renderAsInput(Args, CmdArgs); - - // Output filename. - assert(Output.getType() == types::TY_Object); - const char *Fo = - Args.MakeArgString(std::string("/Fo") + Output.getFilename()); - CmdArgs.push_back(Fo); - - const Driver &D = getToolChain().getDriver(); - std::string Exec = FindVisualStudioExecutable(getToolChain(), "cl.exe", - D.getClangProgramPath()); - return llvm::make_unique(JA, *this, Args.MakeArgString(Exec), - CmdArgs, Inputs); -} - -/// MinGW Tools -void MinGW::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - if (getToolChain().getArch() == llvm::Triple::x86) { - CmdArgs.push_back("--32"); - } else if (getToolChain().getArch() == llvm::Triple::x86_64) { - CmdArgs.push_back("--64"); - } - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); - - if (Args.hasArg(options::OPT_gsplit_dwarf)) - SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, - SplitDebugName(Args, Inputs[0])); -} - -void MinGW::Linker::AddLibGCC(const ArgList &Args, - ArgStringList &CmdArgs) const { - if (Args.hasArg(options::OPT_mthreads)) - CmdArgs.push_back("-lmingwthrd"); - CmdArgs.push_back("-lmingw32"); - - // Make use of compiler-rt if --rtlib option is used - ToolChain::RuntimeLibType RLT = getToolChain().GetRuntimeLibType(Args); - if (RLT == ToolChain::RLT_Libgcc) { - bool Static = Args.hasArg(options::OPT_static_libgcc) || - Args.hasArg(options::OPT_static); - bool Shared = Args.hasArg(options::OPT_shared); - bool CXX = getToolChain().getDriver().CCCIsCXX(); - - if (Static || (!CXX && !Shared)) { - CmdArgs.push_back("-lgcc"); - CmdArgs.push_back("-lgcc_eh"); - } else { - CmdArgs.push_back("-lgcc_s"); - CmdArgs.push_back("-lgcc"); - } - } else { - AddRunTimeLibs(getToolChain(), getToolChain().getDriver(), CmdArgs, Args); - } - - CmdArgs.push_back("-lmoldname"); - CmdArgs.push_back("-lmingwex"); - CmdArgs.push_back("-lmsvcrt"); -} - -void MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const ToolChain &TC = getToolChain(); - const Driver &D = TC.getDriver(); - // const SanitizerArgs &Sanitize = TC.getSanitizerArgs(); - - ArgStringList CmdArgs; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo". Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_w); - - StringRef LinkerName = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "ld"); - if (LinkerName.equals_lower("lld")) { - CmdArgs.push_back("-flavor"); - CmdArgs.push_back("gnu"); - } else if (!LinkerName.equals_lower("ld")) { - D.Diag(diag::err_drv_unsupported_linker) << LinkerName; - } - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - if (Args.hasArg(options::OPT_s)) - CmdArgs.push_back("-s"); - - CmdArgs.push_back("-m"); - if (TC.getArch() == llvm::Triple::x86) - CmdArgs.push_back("i386pe"); - if (TC.getArch() == llvm::Triple::x86_64) - CmdArgs.push_back("i386pep"); - if (TC.getArch() == llvm::Triple::arm) - CmdArgs.push_back("thumb2pe"); - - if (Args.hasArg(options::OPT_mwindows)) { - CmdArgs.push_back("--subsystem"); - CmdArgs.push_back("windows"); - } else if (Args.hasArg(options::OPT_mconsole)) { - CmdArgs.push_back("--subsystem"); - CmdArgs.push_back("console"); - } - - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("-Bstatic"); - else { - if (Args.hasArg(options::OPT_mdll)) - CmdArgs.push_back("--dll"); - else if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("--shared"); - CmdArgs.push_back("-Bdynamic"); - if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-e"); - if (TC.getArch() == llvm::Triple::x86) - CmdArgs.push_back("_DllMainCRTStartup@12"); - else - CmdArgs.push_back("DllMainCRTStartup"); - CmdArgs.push_back("--enable-auto-image-base"); - } - } - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - Args.AddAllArgs(CmdArgs, options::OPT_e); - // FIXME: add -N, -n flags - Args.AddLastArg(CmdArgs, options::OPT_r); - Args.AddLastArg(CmdArgs, options::OPT_s); - Args.AddLastArg(CmdArgs, options::OPT_t); - Args.AddAllArgs(CmdArgs, options::OPT_u_Group); - Args.AddLastArg(CmdArgs, options::OPT_Z_Flag); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_mdll)) { - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("dllcrt2.o"))); - } else { - if (Args.hasArg(options::OPT_municode)) - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2u.o"))); - else - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2.o"))); - } - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("gcrt2.o"))); - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o"))); - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - TC.AddFilePathLibArgs(Args, CmdArgs); - AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); - - // TODO: Add ASan stuff here - - // TODO: Add profile stuff here - - if (D.CCCIsCXX() && - !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && - !Args.hasArg(options::OPT_static); - if (OnlyLibstdcxxStatic) - CmdArgs.push_back("-Bstatic"); - TC.AddCXXStdlibLibArgs(Args, CmdArgs); - if (OnlyLibstdcxxStatic) - CmdArgs.push_back("-Bdynamic"); - } - - if (!Args.hasArg(options::OPT_nostdlib)) { - if (!Args.hasArg(options::OPT_nodefaultlibs)) { - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("--start-group"); - - if (Args.hasArg(options::OPT_fstack_protector) || - Args.hasArg(options::OPT_fstack_protector_strong) || - Args.hasArg(options::OPT_fstack_protector_all)) { - CmdArgs.push_back("-lssp_nonshared"); - CmdArgs.push_back("-lssp"); - } - if (Args.hasArg(options::OPT_fopenmp)) - CmdArgs.push_back("-lgomp"); - - AddLibGCC(Args, CmdArgs); - - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lgmon"); - - if (Args.hasArg(options::OPT_pthread)) - CmdArgs.push_back("-lpthread"); - - // add system libraries - if (Args.hasArg(options::OPT_mwindows)) { - CmdArgs.push_back("-lgdi32"); - CmdArgs.push_back("-lcomdlg32"); - } - CmdArgs.push_back("-ladvapi32"); - CmdArgs.push_back("-lshell32"); - CmdArgs.push_back("-luser32"); - CmdArgs.push_back("-lkernel32"); - - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("--end-group"); - else if (!LinkerName.equals_lower("lld")) - AddLibGCC(Args, CmdArgs); - } - - if (!Args.hasArg(options::OPT_nostartfiles)) { - // Add crtfastmath.o if available and fast math is enabled. - TC.AddFastMathRuntimeIfAvailable(Args, CmdArgs); - - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o"))); - } - } - const char *Exec = Args.MakeArgString(TC.GetProgramPath(LinkerName.data())); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -/// XCore Tools -// We pass assemble and link construction to the xcc tool. - -void XCore::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - CmdArgs.push_back("-c"); - - if (Args.hasArg(options::OPT_v)) - CmdArgs.push_back("-v"); - - if (Arg *A = Args.getLastArg(options::OPT_g_Group)) - if (!A->getOption().matches(options::OPT_g0)) - CmdArgs.push_back("-g"); - - if (Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm, - false)) - CmdArgs.push_back("-fverbose-asm"); - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - for (const auto &II : Inputs) - CmdArgs.push_back(II.getFilename()); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - if (Args.hasArg(options::OPT_v)) - CmdArgs.push_back("-v"); - - // Pass -fexceptions through to the linker if it was present. - if (Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, - false)) - CmdArgs.push_back("-fexceptions"); - - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void CrossWindows::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - const auto &TC = - static_cast(getToolChain()); - ArgStringList CmdArgs; - const char *Exec; - - switch (TC.getArch()) { - default: - llvm_unreachable("unsupported architecture"); - case llvm::Triple::arm: - case llvm::Triple::thumb: - break; - case llvm::Triple::x86: - CmdArgs.push_back("--32"); - break; - case llvm::Triple::x86_64: - CmdArgs.push_back("--64"); - break; - } - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - for (const auto &Input : Inputs) - CmdArgs.push_back(Input.getFilename()); - - const std::string Assembler = TC.GetProgramPath("as"); - Exec = Args.MakeArgString(Assembler); - - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const auto &TC = - static_cast(getToolChain()); - const llvm::Triple &T = TC.getTriple(); - const Driver &D = TC.getDriver(); - SmallString<128> EntryPoint; - ArgStringList CmdArgs; - const char *Exec; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo" - Args.ClaimAllArgs(options::OPT_w); - // Other warning options are already handled somewhere else. - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - if (Args.hasArg(options::OPT_pie)) - CmdArgs.push_back("-pie"); - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - if (Args.hasArg(options::OPT_s)) - CmdArgs.push_back("--strip-all"); - - CmdArgs.push_back("-m"); - switch (TC.getArch()) { - default: - llvm_unreachable("unsupported architecture"); - case llvm::Triple::arm: - case llvm::Triple::thumb: - // FIXME: this is incorrect for WinCE - CmdArgs.push_back("thumb2pe"); - break; - case llvm::Triple::x86: - CmdArgs.push_back("i386pe"); - EntryPoint.append("_"); - break; - case llvm::Triple::x86_64: - CmdArgs.push_back("i386pep"); - break; - } - - if (Args.hasArg(options::OPT_shared)) { - switch (T.getArch()) { - default: - llvm_unreachable("unsupported architecture"); - case llvm::Triple::arm: - case llvm::Triple::thumb: - case llvm::Triple::x86_64: - EntryPoint.append("_DllMainCRTStartup"); - break; - case llvm::Triple::x86: - EntryPoint.append("_DllMainCRTStartup@12"); - break; - } - - CmdArgs.push_back("-shared"); - CmdArgs.push_back("-Bdynamic"); - - CmdArgs.push_back("--enable-auto-image-base"); - - CmdArgs.push_back("--entry"); - CmdArgs.push_back(Args.MakeArgString(EntryPoint)); - } else { - EntryPoint.append("mainCRTStartup"); - - CmdArgs.push_back(Args.hasArg(options::OPT_static) ? "-Bstatic" - : "-Bdynamic"); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - CmdArgs.push_back("--entry"); - CmdArgs.push_back(Args.MakeArgString(EntryPoint)); - } - - // FIXME: handle subsystem - } - - // NOTE: deal with multiple definitions on Windows (e.g. COMDAT) - CmdArgs.push_back("--allow-multiple-definition"); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_rdynamic)) { - SmallString<261> ImpLib(Output.getFilename()); - llvm::sys::path::replace_extension(ImpLib, ".lib"); - - CmdArgs.push_back("--out-implib"); - CmdArgs.push_back(Args.MakeArgString(ImpLib)); - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - const std::string CRTPath(D.SysRoot + "/usr/lib/"); - const char *CRTBegin; - - CRTBegin = - Args.hasArg(options::OPT_shared) ? "crtbeginS.obj" : "crtbegin.obj"; - CmdArgs.push_back(Args.MakeArgString(CRTPath + CRTBegin)); - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - TC.AddFilePathLibArgs(Args, CmdArgs); - AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); - - if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) && - !Args.hasArg(options::OPT_nodefaultlibs)) { - bool StaticCXX = Args.hasArg(options::OPT_static_libstdcxx) && - !Args.hasArg(options::OPT_static); - if (StaticCXX) - CmdArgs.push_back("-Bstatic"); - TC.AddCXXStdlibLibArgs(Args, CmdArgs); - if (StaticCXX) - CmdArgs.push_back("-Bdynamic"); - } - - if (!Args.hasArg(options::OPT_nostdlib)) { - if (!Args.hasArg(options::OPT_nodefaultlibs)) { - // TODO handle /MT[d] /MD[d] - CmdArgs.push_back("-lmsvcrt"); - AddRunTimeLibs(TC, D, CmdArgs, Args); - } - } - - if (TC.getSanitizerArgs().needsAsanRt()) { - // TODO handle /MT[d] /MD[d] - if (Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk")); - } else { - for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"}) - CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib)); - // Make sure the dynamic runtime thunk is not optimized out at link time - // to ensure proper SEH handling. - CmdArgs.push_back(Args.MakeArgString("--undefined")); - CmdArgs.push_back(Args.MakeArgString(TC.getArch() == llvm::Triple::x86 - ? "___asan_seh_interceptor" - : "__asan_seh_interceptor")); - } - } - - Exec = Args.MakeArgString(TC.GetLinkerPath()); - - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - assert(Inputs.size() == 1); - const InputInfo &II = Inputs[0]; - assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX || - II.getType() == types::TY_PP_CXX); - - if (JA.getKind() == Action::PreprocessJobClass) { - Args.ClaimAllArgs(); - CmdArgs.push_back("-E"); - } else { - assert(Output.getType() == types::TY_PP_Asm); // Require preprocessed asm. - CmdArgs.push_back("-S"); - CmdArgs.push_back("-fno-exceptions"); // Always do this even if unspecified. - } - CmdArgs.push_back("-DMYRIAD2"); - - // Append all -I, -iquote, -isystem paths, defines/undefines, - // 'f' flags, optimize flags, and warning options. - // These are spelled the same way in clang and moviCompile. - Args.AddAllArgsExcept( - CmdArgs, - {options::OPT_I_Group, options::OPT_clang_i_Group, options::OPT_std_EQ, - options::OPT_D, options::OPT_U, options::OPT_f_Group, - options::OPT_f_clang_Group, options::OPT_g_Group, options::OPT_M_Group, - options::OPT_O_Group, options::OPT_W_Group, options::OPT_mcpu_EQ}, - {options::OPT_fno_split_dwarf_inlining}); - Args.hasArg(options::OPT_fno_split_dwarf_inlining); // Claim it if present. - - // If we're producing a dependency file, and assembly is the final action, - // then the name of the target in the dependency file should be the '.o' - // file, not the '.s' file produced by this step. For example, instead of - // /tmp/mumble.s: mumble.c .../someheader.h - // the filename on the lefthand side should be "mumble.o" - if (Args.getLastArg(options::OPT_MF) && !Args.getLastArg(options::OPT_MT) && - C.getActions().size() == 1 && - C.getActions()[0]->getKind() == Action::AssembleJobClass) { - Arg *A = Args.getLastArg(options::OPT_o); - if (A) { - CmdArgs.push_back("-MT"); - CmdArgs.push_back(Args.MakeArgString(A->getValue())); - } - } - - CmdArgs.push_back(II.getFilename()); - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - std::string Exec = - Args.MakeArgString(getToolChain().GetProgramPath("moviCompile")); - C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Exec), - CmdArgs, Inputs)); -} - -void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - ArgStringList CmdArgs; - - assert(Inputs.size() == 1); - const InputInfo &II = Inputs[0]; - assert(II.getType() == types::TY_PP_Asm); // Require preprocessed asm input. - assert(Output.getType() == types::TY_Object); - - CmdArgs.push_back("-no6thSlotCompression"); - const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ); - if (CPUArg) - CmdArgs.push_back( - Args.MakeArgString("-cv:" + StringRef(CPUArg->getValue()))); - CmdArgs.push_back("-noSPrefixing"); - CmdArgs.push_back("-a"); // Mystery option. - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - for (const Arg *A : Args.filtered(options::OPT_I, options::OPT_isystem)) { - A->claim(); - CmdArgs.push_back( - Args.MakeArgString(std::string("-i:") + A->getValue(0))); - } - CmdArgs.push_back("-elf"); // Output format. - CmdArgs.push_back(II.getFilename()); - CmdArgs.push_back( - Args.MakeArgString(std::string("-o:") + Output.getFilename())); - - std::string Exec = - Args.MakeArgString(getToolChain().GetProgramPath("moviAsm")); - C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Exec), - CmdArgs, Inputs)); -} - -void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const auto &TC = - static_cast(getToolChain()); - const llvm::Triple &T = TC.getTriple(); - ArgStringList CmdArgs; - bool UseStartfiles = - !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles); - bool UseDefaultLibs = - !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs); - // Silence warning if the args contain both -nostdlib and -stdlib=. - Args.getLastArg(options::OPT_stdlib_EQ); - - if (T.getArch() == llvm::Triple::sparc) - CmdArgs.push_back("-EB"); - else // SHAVE assumes little-endian, and sparcel is expressly so. - CmdArgs.push_back("-EL"); - - // The remaining logic is mostly like gnutools::Linker::ConstructJob, - // but we never pass through a --sysroot option and various other bits. - // For example, there are no sanitizers (yet) nor gold linker. - - // Eat some arguments that may be present but have no effect. - Args.ClaimAllArgs(options::OPT_g_Group); - Args.ClaimAllArgs(options::OPT_w); - Args.ClaimAllArgs(options::OPT_static_libgcc); - - if (Args.hasArg(options::OPT_s)) // Pass the 'strip' option. - CmdArgs.push_back("-s"); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - if (UseStartfiles) { - // If you want startfiles, it means you want the builtin crti and crtbegin, - // but not crt0. Myriad link commands provide their own crt0.o as needed. - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crti.o"))); - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o"))); - } - - Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, - options::OPT_e, options::OPT_s, options::OPT_t, - options::OPT_Z_Flag, options::OPT_r}); - - TC.AddFilePathLibArgs(Args, CmdArgs); - - bool NeedsSanitizerDeps = addSanitizerRuntimes(TC, Args, CmdArgs); - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - - if (UseDefaultLibs) { - if (NeedsSanitizerDeps) - linkSanitizerRuntimeDeps(TC, CmdArgs); - if (C.getDriver().CCCIsCXX()) { - if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) { - CmdArgs.push_back("-lc++"); - CmdArgs.push_back("-lc++abi"); - } else - CmdArgs.push_back("-lstdc++"); - } - if (T.getOS() == llvm::Triple::RTEMS) { - CmdArgs.push_back("--start-group"); - CmdArgs.push_back("-lc"); - CmdArgs.push_back("-lgcc"); // circularly dependent on rtems - // You must provide your own "-L" option to enable finding these. - CmdArgs.push_back("-lrtemscpu"); - CmdArgs.push_back("-lrtemsbsp"); - CmdArgs.push_back("--end-group"); - } else { - CmdArgs.push_back("-lc"); - CmdArgs.push_back("-lgcc"); - } - } - if (UseStartfiles) { - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o"))); - CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtn.o"))); - } - - std::string Exec = - Args.MakeArgString(TC.GetProgramPath("sparc-myriad-elf-ld")); - C.addCommand(llvm::make_unique(JA, *this, Args.MakeArgString(Exec), - CmdArgs, Inputs)); -} - -void PS4cpu::Assemble::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - claimNoWarnArgs(Args); - ArgStringList CmdArgs; - - Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); - - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - - assert(Inputs.size() == 1 && "Unexpected number of inputs."); - const InputInfo &Input = Inputs[0]; - assert(Input.isFilename() && "Invalid input."); - CmdArgs.push_back(Input.getFilename()); - - const char *Exec = - Args.MakeArgString(getToolChain().GetProgramPath("orbis-as")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -static void AddPS4SanitizerArgs(const ToolChain &TC, ArgStringList &CmdArgs) { - const SanitizerArgs &SanArgs = TC.getSanitizerArgs(); - if (SanArgs.needsUbsanRt()) { - CmdArgs.push_back("-lSceDbgUBSanitizer_stub_weak"); - } - if (SanArgs.needsAsanRt()) { - CmdArgs.push_back("-lSceDbgAddressSanitizer_stub_weak"); - } -} - -static void ConstructPS4LinkJob(const Tool &T, Compilation &C, - const JobAction &JA, const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) { - const toolchains::FreeBSD &ToolChain = - static_cast(T.getToolChain()); - const Driver &D = ToolChain.getDriver(); - ArgStringList CmdArgs; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo". Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_w); - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - if (Args.hasArg(options::OPT_pie)) - CmdArgs.push_back("-pie"); - - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("--oformat=so"); - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - AddPS4SanitizerArgs(ToolChain, CmdArgs); - - Args.AddAllArgs(CmdArgs, options::OPT_L); - Args.AddAllArgs(CmdArgs, options::OPT_T_Group); - Args.AddAllArgs(CmdArgs, options::OPT_e); - Args.AddAllArgs(CmdArgs, options::OPT_s); - Args.AddAllArgs(CmdArgs, options::OPT_t); - Args.AddAllArgs(CmdArgs, options::OPT_r); - - if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) - CmdArgs.push_back("--no-demangle"); - - AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - - if (Args.hasArg(options::OPT_pthread)) { - CmdArgs.push_back("-lpthread"); - } - - const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld")); - - C.addCommand(llvm::make_unique(JA, T, Exec, CmdArgs, Inputs)); -} - -static void ConstructGoldLinkJob(const Tool &T, Compilation &C, - const JobAction &JA, const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) { - const toolchains::FreeBSD &ToolChain = - static_cast(T.getToolChain()); - const Driver &D = ToolChain.getDriver(); - ArgStringList CmdArgs; - - // Silence warning for "clang -g foo.o -o foo" - Args.ClaimAllArgs(options::OPT_g_Group); - // and "clang -emit-llvm foo.o -o foo" - Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -w foo.o -o foo". Other warning options are already - // handled somewhere else. - Args.ClaimAllArgs(options::OPT_w); - - if (!D.SysRoot.empty()) - CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); - - if (Args.hasArg(options::OPT_pie)) - CmdArgs.push_back("-pie"); - - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-Bstatic"); - } else { - if (Args.hasArg(options::OPT_rdynamic)) - CmdArgs.push_back("-export-dynamic"); - CmdArgs.push_back("--eh-frame-hdr"); - if (Args.hasArg(options::OPT_shared)) { - CmdArgs.push_back("-Bshareable"); - } else { - CmdArgs.push_back("-dynamic-linker"); - CmdArgs.push_back("/libexec/ld-elf.so.1"); - } - CmdArgs.push_back("--enable-new-dtags"); - } - - if (Output.isFilename()) { - CmdArgs.push_back("-o"); - CmdArgs.push_back(Output.getFilename()); - } else { - assert(Output.isNothing() && "Invalid output."); - } - - AddPS4SanitizerArgs(ToolChain, CmdArgs); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - const char *crt1 = nullptr; - if (!Args.hasArg(options::OPT_shared)) { - if (Args.hasArg(options::OPT_pg)) - crt1 = "gcrt1.o"; - else if (Args.hasArg(options::OPT_pie)) - crt1 = "Scrt1.o"; - else - crt1 = "crt1.o"; - } - if (crt1) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); - - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); - - const char *crtbegin = nullptr; - if (Args.hasArg(options::OPT_static)) - crtbegin = "crtbeginT.o"; - else if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) - crtbegin = "crtbeginS.o"; - else - crtbegin = "crtbegin.o"; - - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); - } - - Args.AddAllArgs(CmdArgs, options::OPT_L); - ToolChain.AddFilePathLibArgs(Args, CmdArgs); - Args.AddAllArgs(CmdArgs, options::OPT_T_Group); - Args.AddAllArgs(CmdArgs, options::OPT_e); - Args.AddAllArgs(CmdArgs, options::OPT_s); - Args.AddAllArgs(CmdArgs, options::OPT_t); - Args.AddAllArgs(CmdArgs, options::OPT_r); - - if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) - CmdArgs.push_back("--no-demangle"); - - AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - // For PS4, we always want to pass libm, libstdc++ and libkernel - // libraries for both C and C++ compilations. - CmdArgs.push_back("-lkernel"); - if (D.CCCIsCXX()) { - ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lm_p"); - else - CmdArgs.push_back("-lm"); - } - // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding - // the default system libraries. Just mimic this for now. - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lgcc_p"); - else - CmdArgs.push_back("-lcompiler_rt"); - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-lstdc++"); - } else if (Args.hasArg(options::OPT_pg)) { - CmdArgs.push_back("-lgcc_eh_p"); - } else { - CmdArgs.push_back("--as-needed"); - CmdArgs.push_back("-lstdc++"); - CmdArgs.push_back("--no-as-needed"); - } - - if (Args.hasArg(options::OPT_pthread)) { - if (Args.hasArg(options::OPT_pg)) - CmdArgs.push_back("-lpthread_p"); - else - CmdArgs.push_back("-lpthread"); - } - - if (Args.hasArg(options::OPT_pg)) { - if (Args.hasArg(options::OPT_shared)) - CmdArgs.push_back("-lc"); - else { - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("--start-group"); - CmdArgs.push_back("-lc_p"); - CmdArgs.push_back("-lpthread_p"); - CmdArgs.push_back("--end-group"); - } else { - CmdArgs.push_back("-lc_p"); - } - } - CmdArgs.push_back("-lgcc_p"); - } else { - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("--start-group"); - CmdArgs.push_back("-lc"); - CmdArgs.push_back("-lpthread"); - CmdArgs.push_back("--end-group"); - } else { - CmdArgs.push_back("-lc"); - } - CmdArgs.push_back("-lcompiler_rt"); - } - - if (Args.hasArg(options::OPT_static)) { - CmdArgs.push_back("-lstdc++"); - } else if (Args.hasArg(options::OPT_pg)) { - CmdArgs.push_back("-lgcc_eh_p"); - } else { - CmdArgs.push_back("--as-needed"); - CmdArgs.push_back("-lstdc++"); - CmdArgs.push_back("--no-as-needed"); - } - } - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { - if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o"))); - else - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o"))); - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); - } - - const char *Exec = -#ifdef LLVM_ON_WIN32 - Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld.gold")); -#else - Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld")); -#endif - - C.addCommand(llvm::make_unique(JA, T, Exec, CmdArgs, Inputs)); -} - -void PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const toolchains::FreeBSD &ToolChain = - static_cast(getToolChain()); - const Driver &D = ToolChain.getDriver(); - bool PS4Linker; - StringRef LinkerOptName; - if (const Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ)) { - LinkerOptName = A->getValue(); - if (LinkerOptName != "ps4" && LinkerOptName != "gold") - D.Diag(diag::err_drv_unsupported_linker) << LinkerOptName; - } - - if (LinkerOptName == "gold") - PS4Linker = false; - else if (LinkerOptName == "ps4") - PS4Linker = true; - else - PS4Linker = !Args.hasArg(options::OPT_shared); - - if (PS4Linker) - ConstructPS4LinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput); - else - ConstructGoldLinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput); -} - -void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const auto &TC = - static_cast(getToolChain()); - assert(TC.getTriple().isNVPTX() && "Wrong platform"); - - // Obtain architecture from the action. - CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch()); - assert(gpu_arch != CudaArch::UNKNOWN && - "Device action expected to have an architecture."); - - // Check that our installation's ptxas supports gpu_arch. - if (!Args.hasArg(options::OPT_no_cuda_version_check)) { - TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch); - } - - ArgStringList CmdArgs; - CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); - if (Args.hasFlag(options::OPT_cuda_noopt_device_debug, - options::OPT_no_cuda_noopt_device_debug, false)) { - // ptxas does not accept -g option if optimization is enabled, so - // we ignore the compiler's -O* options if we want debug info. - CmdArgs.push_back("-g"); - CmdArgs.push_back("--dont-merge-basicblocks"); - CmdArgs.push_back("--return-at-end"); - } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - // Map the -O we received to -O{0,1,2,3}. - // - // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's - // default, so it may correspond more closely to the spirit of clang -O2. - - // -O3 seems like the least-bad option when -Osomething is specified to - // clang but it isn't handled below. - StringRef OOpt = "3"; - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - OOpt = "3"; - else if (A->getOption().matches(options::OPT_O0)) - OOpt = "0"; - else if (A->getOption().matches(options::OPT_O)) { - // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options. - OOpt = llvm::StringSwitch(A->getValue()) - .Case("1", "1") - .Case("2", "2") - .Case("3", "3") - .Case("s", "2") - .Case("z", "2") - .Default("2"); - } - CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt)); - } else { - // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond - // to no optimizations, but ptxas's default is -O3. - CmdArgs.push_back("-O0"); - } - - CmdArgs.push_back("--gpu-name"); - CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch))); - CmdArgs.push_back("--output-file"); - CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); - for (const auto& II : Inputs) - CmdArgs.push_back(Args.MakeArgString(II.getFilename())); - - for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) - CmdArgs.push_back(Args.MakeArgString(A)); - - const char *Exec = Args.MakeArgString(TC.GetProgramPath("ptxas")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} - -// All inputs to this linker must be from CudaDeviceActions, as we need to look -// at the Inputs' Actions in order to figure out which GPU architecture they -// correspond to. -void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const auto &TC = - static_cast(getToolChain()); - assert(TC.getTriple().isNVPTX() && "Wrong platform"); - - ArgStringList CmdArgs; - CmdArgs.push_back("--cuda"); - CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32"); - CmdArgs.push_back(Args.MakeArgString("--create")); - CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); - - for (const auto& II : Inputs) { - auto *A = II.getAction(); - assert(A->getInputs().size() == 1 && - "Device offload action is expected to have a single input"); - const char *gpu_arch_str = A->getOffloadingArch(); - assert(gpu_arch_str && - "Device action expected to have associated a GPU architecture!"); - CudaArch gpu_arch = StringToCudaArch(gpu_arch_str); - - // We need to pass an Arch of the form "sm_XX" for cubin files and - // "compute_XX" for ptx. - const char *Arch = - (II.getType() == types::TY_PP_Asm) - ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch)) - : gpu_arch_str; - CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") + - Arch + ",file=" + II.getFilename())); - } - - for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) - CmdArgs.push_back(Args.MakeArgString(A)); - - const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary")); - C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); -} diff --git a/tools/clang/lib/Format/FormatToken.cpp b/tools/clang/lib/Format/FormatToken.cpp index 976d746..152f34e 100644 --- a/tools/clang/lib/Format/FormatToken.cpp +++ b/tools/clang/lib/Format/FormatToken.cpp @@ -78,6 +78,9 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, if (State.NextToken == nullptr || !State.NextToken->Previous) return 0; + if (Formats.size() == 1) + return 0; // Handled by formatFromToken + // Ensure that we start on the opening brace. const FormatToken *LBrace = State.NextToken->Previous->getPreviousNonComment(); @@ -93,6 +96,7 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, // Find the best ColumnFormat, i.e. the best number of columns to use. const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); + // If no ColumnFormat can be used, the braced list would generally be // bin-packed. Add a severe penalty to this so that column layouts are // preferred if possible. @@ -130,7 +134,9 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State, unsigned CommaSeparatedList::formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) { - if (HasNestedBracedList) + // Formatting with 1 Column isn't really a column layout, so we don't need the + // special logic here. We can just avoid bin packing any of the parameters. + if (Formats.size() == 1 || HasNestedBracedList) State.Stack.back().AvoidBinPacking = true; return 0; } @@ -274,7 +280,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { continue; // Ignore layouts that are bound to violate the column limit. - if (Format.TotalWidth > Style.ColumnLimit) + if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) continue; Formats.push_back(Format); @@ -288,7 +294,7 @@ CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { I = Formats.rbegin(), E = Formats.rend(); I != E; ++I) { - if (I->TotalWidth <= RemainingCharacters) { + if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) { if (BestFormat && I->LineCount > BestFormat->LineCount) break; BestFormat = &*I; diff --git a/tools/clang/lib/Frontend/InitPreprocessor.cpp b/tools/clang/lib/Frontend/InitPreprocessor.cpp index f09e6a8..d3644bd 100644 --- a/tools/clang/lib/Frontend/InitPreprocessor.cpp +++ b/tools/clang/lib/Frontend/InitPreprocessor.cpp @@ -475,6 +475,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_user_defined_literals", "200809"); Builder.defineMacro("__cpp_lambdas", "200907"); Builder.defineMacro("__cpp_constexpr", + LangOpts.CPlusPlus1z ? "201603" : LangOpts.CPlusPlus14 ? "201304" : "200704"); Builder.defineMacro("__cpp_range_based_for", LangOpts.CPlusPlus1z ? "201603" : "200907"); @@ -517,9 +518,13 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_namespace_attributes", "201411"); Builder.defineMacro("__cpp_enumerator_attributes", "201411"); Builder.defineMacro("__cpp_nested_namespace_definitions", "201411"); + Builder.defineMacro("__cpp_variadic_using", "201611"); Builder.defineMacro("__cpp_aggregate_bases", "201603"); + Builder.defineMacro("__cpp_structured_bindings", "201606"); Builder.defineMacro("__cpp_nontype_template_args", "201411"); Builder.defineMacro("__cpp_fold_expressions", "201603"); + // FIXME: This is not yet listed in SD-6. + Builder.defineMacro("__cpp_deduction_guides", "201611"); } if (LangOpts.AlignedAllocation) Builder.defineMacro("__cpp_aligned_new", "201606"); @@ -591,9 +596,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI, Builder.defineMacro("OBJC_ZEROCOST_EXCEPTIONS"); } - Builder.defineMacro("__OBJC_BOOL_IS_BOOL", - Twine(TI.useSignedCharForObjCBool() ? "0" : "1")); - if (LangOpts.getGC() != LangOptions::NonGC) Builder.defineMacro("__OBJC_GC__"); @@ -624,6 +626,11 @@ static void InitializePredefinedMacros(const TargetInfo &TI, Builder.defineMacro("IB_DESIGNABLE", ""); } + // Define a macro that describes the Objective-C boolean type even for C + // and C++ since BOOL can be used from non Objective-C code. + Builder.defineMacro("__OBJC_BOOL_IS_BOOL", + Twine(TI.useSignedCharForObjCBool() ? "0" : "1")); + if (LangOpts.CPlusPlus) InitializeCPlusPlusFeatureTestMacros(LangOpts, Builder); @@ -990,7 +997,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, // OpenCL definitions. if (LangOpts.OpenCL) { #define OPENCLEXT(Ext) \ - if (TI.getSupportedOpenCLOpts().is_##Ext##_supported( \ + if (TI.getSupportedOpenCLOpts().isSupported(#Ext, \ LangOpts.OpenCLVersion)) \ Builder.defineMacro(#Ext); #include "clang/Basic/OpenCLExtensions.def" diff --git a/tools/clang/lib/Index/USRGeneration.cpp b/tools/clang/lib/Index/USRGeneration.cpp index 07fa953..141a7f4 100644 --- a/tools/clang/lib/Index/USRGeneration.cpp +++ b/tools/clang/lib/Index/USRGeneration.cpp @@ -310,7 +310,7 @@ void USRGenerator::VisitVarDecl(const VarDecl *D) { // For a template specialization, mangle the template arguments. if (const VarTemplateSpecializationDecl *Spec = dyn_cast(D)) { - const TemplateArgumentList &Args = Spec->getTemplateInstantiationArgs(); + const TemplateArgumentList &Args = Spec->getTemplateArgs(); Out << '>'; for (unsigned I = 0, N = Args.size(); I != N; ++I) { Out << '#'; @@ -521,7 +521,7 @@ void USRGenerator::VisitTagDecl(const TagDecl *D) { // For a class template specialization, mangle the template arguments. if (const ClassTemplateSpecializationDecl *Spec = dyn_cast(D)) { - const TemplateArgumentList &Args = Spec->getTemplateInstantiationArgs(); + const TemplateArgumentList &Args = Spec->getTemplateArgs(); Out << '>'; for (unsigned I = 0, N = Args.size(); I != N; ++I) { Out << '#'; @@ -656,7 +656,6 @@ void USRGenerator::VisitType(QualType T) { case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: - case BuiltinType::OCLNDRange: case BuiltinType::OCLReserveID: case BuiltinType::OCLSampler: IgnoreResults = true; @@ -913,21 +912,30 @@ bool clang::index::generateUSRForDecl(const Decl *D, bool clang::index::generateUSRForMacro(const MacroDefinitionRecord *MD, const SourceManager &SM, SmallVectorImpl &Buf) { + if (!MD) + return true; + return generateUSRForMacro(MD->getName()->getName(), MD->getLocation(), + SM, Buf); + +} + +bool clang::index::generateUSRForMacro(StringRef MacroName, SourceLocation Loc, + const SourceManager &SM, + SmallVectorImpl &Buf) { // Don't generate USRs for things with invalid locations. - if (!MD || MD->getLocation().isInvalid()) + if (MacroName.empty() || Loc.isInvalid()) return true; llvm::raw_svector_ostream Out(Buf); // Assume that system headers are sane. Don't put source location // information into the USR if the macro comes from a system header. - SourceLocation Loc = MD->getLocation(); bool ShouldGenerateLocation = !SM.isInSystemHeader(Loc); Out << getUSRSpacePrefix(); if (ShouldGenerateLocation) printLoc(Out, Loc, SM, /*IncludeOffset=*/true); Out << "@macro@"; - Out << MD->getName()->getName(); + Out << MacroName; return false; } diff --git a/tools/clang/lib/Parse/ParseDecl.cpp b/tools/clang/lib/Parse/ParseDecl.cpp index aa212ac..4791ab0 100644 --- a/tools/clang/lib/Parse/ParseDecl.cpp +++ b/tools/clang/lib/Parse/ParseDecl.cpp @@ -177,8 +177,12 @@ void Parser::ParseGNUAttributes(ParsedAttributes &attrs, if (!ClassStack.empty() && !LateAttrs->parseSoon()) getCurrentClass().LateParsedDeclarations.push_back(LA); - // consume everything up to and including the matching right parens - ConsumeAndStoreUntil(tok::r_paren, LA->Toks, true, false); + // Be sure ConsumeAndStoreUntil doesn't see the start l_paren, since it + // recursively consumes balanced parens. + LA->Toks.push_back(Tok); + ConsumeParen(); + // Consume everything up to and including the matching right parens. + ConsumeAndStoreUntil(tok::r_paren, LA->Toks, /*StopAtSemi=*/true); Token Eof; Eof.startToken(); @@ -302,10 +306,11 @@ unsigned Parser::ParseAttributeArgsCommon( // Parse the non-empty comma-separated list of expressions. do { - bool ShouldEnter = attributeParsedArgsUnevaluated(*AttrName); + bool Uneval = attributeParsedArgsUnevaluated(*AttrName); EnterExpressionEvaluationContext Unevaluated( - Actions, Sema::Unevaluated, /*LambdaContextDecl=*/nullptr, - /*IsDecltype=*/false, ShouldEnter); + Actions, Uneval ? Sema::Unevaluated : Sema::ConstantEvaluated, + /*LambdaContextDecl=*/nullptr, + /*IsDecltype=*/false); ExprResult ArgExpr( Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression())); @@ -351,6 +356,10 @@ void Parser::ParseGNUAttributeArgs(IdentifierInfo *AttrName, ParseAvailabilityAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc, ScopeName, ScopeLoc, Syntax); return; + } else if (AttrKind == AttributeList::AT_ExternalSourceSymbol) { + ParseExternalSourceSymbolAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc, + ScopeName, ScopeLoc, Syntax); + return; } else if (AttrKind == AttributeList::AT_ObjCBridgeRelated) { ParseObjCBridgeRelatedAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc, ScopeName, ScopeLoc, Syntax); @@ -384,6 +393,25 @@ void Parser::ParseGNUAttributeArgs(IdentifierInfo *AttrName, ScopeLoc, Syntax); } +unsigned Parser::ParseClangAttributeArgs( + IdentifierInfo *AttrName, SourceLocation AttrNameLoc, + ParsedAttributes &Attrs, SourceLocation *EndLoc, IdentifierInfo *ScopeName, + SourceLocation ScopeLoc, AttributeList::Syntax Syntax) { + assert(Tok.is(tok::l_paren) && "Attribute arg list not starting with '('"); + + AttributeList::Kind AttrKind = + AttributeList::getKind(AttrName, ScopeName, Syntax); + + if (AttrKind == AttributeList::AT_ExternalSourceSymbol) { + ParseExternalSourceSymbolAttribute(*AttrName, AttrNameLoc, Attrs, EndLoc, + ScopeName, ScopeLoc, Syntax); + return Attrs.getList() ? Attrs.getList()->getNumArgs() : 0; + } + + return ParseAttributeArgsCommon(AttrName, AttrNameLoc, Attrs, EndLoc, + ScopeName, ScopeLoc, Syntax); +} + bool Parser::ParseMicrosoftDeclSpecArgs(IdentifierInfo *AttrName, SourceLocation AttrNameLoc, ParsedAttributes &Attrs) { @@ -1059,6 +1087,119 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability, Syntax, StrictLoc, ReplacementExpr.get()); } +/// \brief Parse the contents of the "external_source_symbol" attribute. +/// +/// external-source-symbol-attribute: +/// 'external_source_symbol' '(' keyword-arg-list ')' +/// +/// keyword-arg-list: +/// keyword-arg +/// keyword-arg ',' keyword-arg-list +/// +/// keyword-arg: +/// 'language' '=' +/// 'defined_in' '=' +/// 'generated_declaration' +void Parser::ParseExternalSourceSymbolAttribute( + IdentifierInfo &ExternalSourceSymbol, SourceLocation Loc, + ParsedAttributes &Attrs, SourceLocation *EndLoc, IdentifierInfo *ScopeName, + SourceLocation ScopeLoc, AttributeList::Syntax Syntax) { + // Opening '('. + BalancedDelimiterTracker T(*this, tok::l_paren); + if (T.expectAndConsume()) + return; + + // Initialize the pointers for the keyword identifiers when required. + if (!Ident_language) { + Ident_language = PP.getIdentifierInfo("language"); + Ident_defined_in = PP.getIdentifierInfo("defined_in"); + Ident_generated_declaration = PP.getIdentifierInfo("generated_declaration"); + } + + ExprResult Language; + bool HasLanguage = false; + ExprResult DefinedInExpr; + bool HasDefinedIn = false; + IdentifierLoc *GeneratedDeclaration = nullptr; + + // Parse the language/defined_in/generated_declaration keywords + do { + if (Tok.isNot(tok::identifier)) { + Diag(Tok, diag::err_external_source_symbol_expected_keyword); + SkipUntil(tok::r_paren, StopAtSemi); + return; + } + + SourceLocation KeywordLoc = Tok.getLocation(); + IdentifierInfo *Keyword = Tok.getIdentifierInfo(); + if (Keyword == Ident_generated_declaration) { + if (GeneratedDeclaration) { + Diag(Tok, diag::err_external_source_symbol_duplicate_clause) << Keyword; + SkipUntil(tok::r_paren, StopAtSemi); + return; + } + GeneratedDeclaration = ParseIdentifierLoc(); + continue; + } + + if (Keyword != Ident_language && Keyword != Ident_defined_in) { + Diag(Tok, diag::err_external_source_symbol_expected_keyword); + SkipUntil(tok::r_paren, StopAtSemi); + return; + } + + ConsumeToken(); + if (ExpectAndConsume(tok::equal, diag::err_expected_after, + Keyword->getName())) { + SkipUntil(tok::r_paren, StopAtSemi); + return; + } + + bool HadLanguage = HasLanguage, HadDefinedIn = HasDefinedIn; + if (Keyword == Ident_language) + HasLanguage = true; + else + HasDefinedIn = true; + + if (Tok.isNot(tok::string_literal)) { + Diag(Tok, diag::err_expected_string_literal) + << /*Source='external_source_symbol attribute'*/ 3 + << /*language | source container*/ (Keyword != Ident_language); + SkipUntil(tok::comma, tok::r_paren, StopAtSemi | StopBeforeMatch); + continue; + } + if (Keyword == Ident_language) { + if (HadLanguage) { + Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause) + << Keyword; + ParseStringLiteralExpression(); + continue; + } + Language = ParseStringLiteralExpression(); + } else { + assert(Keyword == Ident_defined_in && "Invalid clause keyword!"); + if (HadDefinedIn) { + Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause) + << Keyword; + ParseStringLiteralExpression(); + continue; + } + DefinedInExpr = ParseStringLiteralExpression(); + } + } while (TryConsumeToken(tok::comma)); + + // Closing ')'. + if (T.consumeClose()) + return; + if (EndLoc) + *EndLoc = T.getCloseLocation(); + + ArgsUnion Args[] = {Language.get(), DefinedInExpr.get(), + GeneratedDeclaration}; + Attrs.addNew(&ExternalSourceSymbol, SourceRange(Loc, T.getCloseLocation()), + ScopeName, ScopeLoc, Args, llvm::array_lengthof(Args), Syntax); +} + /// \brief Parse the contents of the "objc_bridge_related" attribute. /// objc_bridge_related '(' related_class ',' opt-class_method ',' opt-instance_method ')' /// related_class: @@ -1506,7 +1647,6 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(unsigned Context, ObjCDeclContextSwitch ObjCDC(*this); Decl *SingleDecl = nullptr; - Decl *OwnedType = nullptr; switch (Tok.getKind()) { case tok::kw_template: case tok::kw_export: @@ -1526,9 +1666,8 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(unsigned Context, ProhibitAttributes(attrs); return ParseNamespace(Context, DeclEnd); case tok::kw_using: - SingleDecl = ParseUsingDirectiveOrDeclaration(Context, ParsedTemplateInfo(), - DeclEnd, attrs, &OwnedType); - break; + return ParseUsingDirectiveOrDeclaration(Context, ParsedTemplateInfo(), + DeclEnd, attrs); case tok::kw_static_assert: case tok::kw__Static_assert: ProhibitAttributes(attrs); @@ -1539,9 +1678,8 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(unsigned Context, } // This routine returns a DeclGroup, if the thing we parsed only contains a - // single decl, convert it now. Alias declarations can also declare a type; - // include that too if it is present. - return Actions.ConvertDeclToDeclGroup(SingleDecl, OwnedType); + // single decl, convert it now. + return Actions.ConvertDeclToDeclGroup(SingleDecl); } /// simple-declaration: [C99 6.7: declaration] [C++ 7p1: dcl.dcl] @@ -1589,7 +1727,7 @@ Parser::ParseSimpleDeclaration(unsigned Context, DS.complete(TheDecl); if (AnonRecord) { Decl* decls[] = {AnonRecord, TheDecl}; - return Actions.BuildDeclaratorGroup(decls, /*TypeMayContainAuto=*/false); + return Actions.BuildDeclaratorGroup(decls); } return Actions.ConvertDeclToDeclGroup(TheDecl); } @@ -2043,8 +2181,6 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( } } - bool TypeContainsAuto = D.getDeclSpec().containsPlaceholderType(); - // Parse declarator '=' initializer. // If a '==' or '+=' is found, suggest a fixit to '='. if (isTokenEqualOrEqualTypo()) { @@ -2104,7 +2240,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( Actions.ActOnInitializerError(ThisDecl); } else Actions.AddInitializerToDecl(ThisDecl, Init.get(), - /*DirectInit=*/false, TypeContainsAuto); + /*DirectInit=*/false); } } else if (Tok.is(tok::l_paren)) { // Parse C++ direct initializer: '(' expression-list ')' @@ -2147,7 +2283,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( T.getCloseLocation(), Exprs); Actions.AddInitializerToDecl(ThisDecl, Initializer.get(), - /*DirectInit=*/true, TypeContainsAuto); + /*DirectInit=*/true); } } else if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace) && (!CurParsedObjCImpl || !D.isFunctionDeclarator())) { @@ -2169,11 +2305,10 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes( if (Init.isInvalid()) { Actions.ActOnInitializerError(ThisDecl); } else - Actions.AddInitializerToDecl(ThisDecl, Init.get(), - /*DirectInit=*/true, TypeContainsAuto); + Actions.AddInitializerToDecl(ThisDecl, Init.get(), /*DirectInit=*/true); } else { - Actions.ActOnUninitializedDecl(ThisDecl, TypeContainsAuto); + Actions.ActOnUninitializedDecl(ThisDecl); } Actions.FinalizeDeclaration(ThisDecl); @@ -2825,44 +2960,23 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, ->Kind == TNK_Type_template) { // We have a qualified template-id, e.g., N::A - // C++ [class.qual]p2: - // In a lookup in which the constructor is an acceptable lookup - // result and the nested-name-specifier nominates a class C: - // - // - if the name specified after the - // nested-name-specifier, when looked up in C, is the - // injected-class-name of C (Clause 9), or - // - // - if the name specified after the nested-name-specifier - // is the same as the identifier or the - // simple-template-id's template-name in the last - // component of the nested-name-specifier, - // - // the name is instead considered to name the constructor of - // class C. + // If this would be a valid constructor declaration with template + // arguments, we will reject the attempt to form an invalid type-id + // referring to the injected-class-name when we annotate the token, + // per C++ [class.qual]p2. // - // Thus, if the template-name is actually the constructor - // name, then the code is ill-formed; this interpretation is - // reinforced by the NAD status of core issue 635. + // To improve diagnostics for this case, parse the declaration as a + // constructor (and reject the extra template arguments later). TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Next); if ((DSContext == DSC_top_level || DSContext == DSC_class) && TemplateId->Name && - Actions.isCurrentClassName(*TemplateId->Name, getCurScope(), &SS)) { - if (isConstructorDeclarator(/*Unqualified*/false)) { - // The user meant this to be an out-of-line constructor - // definition, but template arguments are not allowed - // there. Just allow this as a constructor; we'll - // complain about it later. - goto DoneWithDeclSpec; - } - - // The user meant this to name a type, but it actually names - // a constructor with some extraneous template - // arguments. Complain, then parse it as a type as the user - // intended. - Diag(TemplateId->TemplateNameLoc, - diag::err_out_of_line_template_id_type_names_constructor) - << TemplateId->Name << 0 /* template name */; + Actions.isCurrentClassName(*TemplateId->Name, getCurScope(), &SS) && + isConstructorDeclarator(/*Unqualified*/false)) { + // The user meant this to be an out-of-line constructor + // definition, but template arguments are not allowed + // there. Just allow this as a constructor; we'll + // complain about it later. + goto DoneWithDeclSpec; } DS.getTypeSpecScope() = SS; @@ -2893,30 +3007,21 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, if (Next.isNot(tok::identifier)) goto DoneWithDeclSpec; - // If we're in a context where the identifier could be a class name, - // check whether this is a constructor declaration. + // Check whether this is a constructor declaration. If we're in a + // context where the identifier could be a class name, and it has the + // shape of a constructor declaration, process it as one. if ((DSContext == DSC_top_level || DSContext == DSC_class) && Actions.isCurrentClassName(*Next.getIdentifierInfo(), getCurScope(), - &SS)) { - if (isConstructorDeclarator(/*Unqualified*/false)) - goto DoneWithDeclSpec; - - // As noted in C++ [class.qual]p2 (cited above), when the name - // of the class is qualified in a context where it could name - // a constructor, its a constructor name. However, we've - // looked at the declarator, and the user probably meant this - // to be a type. Complain that it isn't supposed to be treated - // as a type, then proceed to parse it as a type. - Diag(Next.getLocation(), - diag::err_out_of_line_template_id_type_names_constructor) - << Next.getIdentifierInfo() << 1 /* type */; - } + &SS) && + isConstructorDeclarator(/*Unqualified*/ false)) + goto DoneWithDeclSpec; ParsedType TypeRep = Actions.getTypeName(*Next.getIdentifierInfo(), Next.getLocation(), getCurScope(), &SS, false, false, nullptr, /*IsCtorOrDtorName=*/false, - /*NonTrivialSourceInfo=*/true); + /*WantNonTrivialSourceInfo=*/true, + isClassTemplateDeductionContext(DSContext)); // If the referenced identifier is not a type, then this declspec is // erroneous: We already checked about that it has no type specifier, and @@ -2997,6 +3102,31 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, if (DS.hasTypeSpecifier()) goto DoneWithDeclSpec; + // If the token is an identifier named "__declspec" and Microsoft + // extensions are not enabled, it is likely that there will be cascading + // parse errors if this really is a __declspec attribute. Attempt to + // recognize that scenario and recover gracefully. + if (!getLangOpts().DeclSpecKeyword && Tok.is(tok::identifier) && + Tok.getIdentifierInfo()->getName().equals("__declspec")) { + Diag(Loc, diag::err_ms_attributes_not_enabled); + + // The next token should be an open paren. If it is, eat the entire + // attribute declaration and continue. + if (NextToken().is(tok::l_paren)) { + // Consume the __declspec identifier. + ConsumeToken(); + + // Eat the parens and everything between them. + BalancedDelimiterTracker T(*this, tok::l_paren); + if (T.consumeOpen()) { + assert(false && "Not a left paren?"); + return; + } + T.skipToEnd(); + continue; + } + } + // In C++, check to see if this is a scope specifier like foo::bar::, if // so handle it as such. This is important for ctor parsing. if (getLangOpts().CPlusPlus) { @@ -3030,9 +3160,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, continue; } - ParsedType TypeRep = - Actions.getTypeName(*Tok.getIdentifierInfo(), - Tok.getLocation(), getCurScope()); + ParsedType TypeRep = Actions.getTypeName( + *Tok.getIdentifierInfo(), Tok.getLocation(), getCurScope(), nullptr, + false, false, nullptr, false, false, + isClassTemplateDeductionContext(DSContext)); // If this is not a typedef name, don't parse it as part of the declspec, // it must be an implicit int or an error. @@ -3055,6 +3186,16 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, isConstructorDeclarator(/*Unqualified*/true)) goto DoneWithDeclSpec; + // Likewise, if this is a context where the identifier could be a template + // name, check whether this is a deduction guide declaration. + if (getLangOpts().CPlusPlus1z && + (DSContext == DSC_class || DSContext == DSC_top_level) && + Actions.isDeductionGuideName(getCurScope(), *Tok.getIdentifierInfo(), + Tok.getLocation()) && + isConstructorDeclarator(/*Unqualified*/ true, + /*DeductionGuide*/ true)) + goto DoneWithDeclSpec; + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typename, Loc, PrevSpec, DiagID, TypeRep, Policy); if (isInvalid) @@ -4681,7 +4822,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { } } -bool Parser::isConstructorDeclarator(bool IsUnqualified) { +bool Parser::isConstructorDeclarator(bool IsUnqualified, bool DeductionGuide) { TentativeParsingAction TPA(*this); // Parse the C++ scope specifier. @@ -4702,6 +4843,10 @@ bool Parser::isConstructorDeclarator(bool IsUnqualified) { return false; } + // There may be attributes here, appertaining to the constructor name or type + // we just stepped past. + SkipCXX11Attributes(); + // Current class name must be followed by a left parenthesis. if (Tok.isNot(tok::l_paren)) { TPA.Revert(); @@ -4769,13 +4914,24 @@ bool Parser::isConstructorDeclarator(bool IsUnqualified) { case tok::r_paren: // C(X ) - if (NextToken().is(tok::colon) || NextToken().is(tok::kw_try)) { + + // Skip past the right-paren and any following attributes to get to + // the function body or trailing-return-type. + ConsumeParen(); + SkipCXX11Attributes(); + + if (DeductionGuide) { + // C(X) -> ... is a deduction guide. + IsConstructor = Tok.is(tok::arrow); + break; + } + if (Tok.is(tok::colon) || Tok.is(tok::kw_try)) { // Assume these were meant to be constructors: // C(X) : (the name of a bit-field cannot be parenthesized). // C(X) try (this is otherwise ill-formed). IsConstructor = true; } - if (NextToken().is(tok::semi) || NextToken().is(tok::l_brace)) { + if (Tok.is(tok::semi) || Tok.is(tok::l_brace)) { // If we have a constructor name within the class definition, // assume these were meant to be constructors: // C(X) { @@ -4786,7 +4942,7 @@ bool Parser::isConstructorDeclarator(bool IsUnqualified) { // // FIXME: We can actually do this whether or not the name is qualified, // because if it is qualified in this context it must be being used as - // a constructor name. However, we do not implement that rule correctly + // a constructor name. // currently, so we're somewhat conservative here. IsConstructor = IsUnqualified; } @@ -4814,9 +4970,10 @@ bool Parser::isConstructorDeclarator(bool IsUnqualified) { /// [ only if AttReqs & AR_CXX11AttributesParsed ] /// Note: vendor can be GNU, MS, etc and can be explicitly controlled via /// AttrRequirements bitmask values. -void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, unsigned AttrReqs, - bool AtomicAllowed, - bool IdentifierRequired) { +void Parser::ParseTypeQualifierListOpt( + DeclSpec &DS, unsigned AttrReqs, bool AtomicAllowed, + bool IdentifierRequired, + Optional> CodeCompletionHandler) { if (getLangOpts().CPlusPlus11 && (AttrReqs & AR_CXX11AttributesParsed) && isCXX11AttributeSpecifier()) { ParsedAttributesWithRange attrs(AttrFactory); @@ -4834,7 +4991,10 @@ void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, unsigned AttrReqs, switch (Tok.getKind()) { case tok::code_completion: - Actions.CodeCompleteTypeQualifiers(DS); + if (CodeCompletionHandler) + (*CodeCompletionHandler)(); + else + Actions.CodeCompleteTypeQualifiers(DS); return cutOffParsing(); case tok::kw_const: @@ -5317,21 +5477,29 @@ void Parser::ParseDirectDeclarator(Declarator &D) { // We found something that indicates the start of an unqualified-id. // Parse that unqualified-id. bool AllowConstructorName; - if (D.getDeclSpec().hasTypeSpecifier()) + bool AllowDeductionGuide; + if (D.getDeclSpec().hasTypeSpecifier()) { AllowConstructorName = false; - else if (D.getCXXScopeSpec().isSet()) + AllowDeductionGuide = false; + } else if (D.getCXXScopeSpec().isSet()) { AllowConstructorName = (D.getContext() == Declarator::FileContext || D.getContext() == Declarator::MemberContext); - else + AllowDeductionGuide = false; + } else { AllowConstructorName = (D.getContext() == Declarator::MemberContext); + AllowDeductionGuide = + (D.getContext() == Declarator::FileContext || + D.getContext() == Declarator::MemberContext); + } SourceLocation TemplateKWLoc; bool HadScope = D.getCXXScopeSpec().isValid(); if (ParseUnqualifiedId(D.getCXXScopeSpec(), /*EnteringContext=*/true, /*AllowDestructorName=*/true, AllowConstructorName, - nullptr, TemplateKWLoc, D.getName()) || + AllowDeductionGuide, nullptr, TemplateKWLoc, + D.getName()) || // Once we're past the identifier, if the scope was bad, mark the // whole declarator bad. D.getCXXScopeSpec().isInvalid()) { @@ -5752,7 +5920,11 @@ void Parser::ParseFunctionDeclarator(Declarator &D, // Parse cv-qualifier-seq[opt]. ParseTypeQualifierListOpt(DS, AR_NoAttributesParsed, - /*AtomicAllowed*/ false); + /*AtomicAllowed*/ false, + /*IdentifierRequired=*/false, + llvm::function_ref([&]() { + Actions.CodeCompleteFunctionQualifiers(DS, D); + })); if (!DS.getSourceRange().getEnd().isInvalid()) { EndLoc = DS.getSourceRange().getEnd(); ConstQualifierLoc = DS.getConstSpecLoc(); diff --git a/tools/clang/lib/Parse/ParseExprCXX.cpp b/tools/clang/lib/Parse/ParseExprCXX.cpp index 071afdd..bf741c2 100644 --- a/tools/clang/lib/Parse/ParseExprCXX.cpp +++ b/tools/clang/lib/Parse/ParseExprCXX.cpp @@ -216,7 +216,10 @@ bool Parser::ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS, SourceLocation EndLoc = ParseDecltypeSpecifier(DS); SourceLocation CCLoc; - if (!TryConsumeToken(tok::coloncolon, CCLoc)) { + // Work around a standard defect: 'decltype(auto)::' is not a + // nested-name-specifier. + if (DS.getTypeSpecType() == DeclSpec::TST_decltype_auto || + !TryConsumeToken(tok::coloncolon, CCLoc)) { AnnotateExistingDecltypeSpecifier(DS, DeclLoc, EndLoc); return false; } @@ -310,11 +313,9 @@ bool Parser::ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS, // Commit to parsing the template-id. TPA.Commit(); TemplateTy Template; - if (TemplateNameKind TNK - = Actions.ActOnDependentTemplateName(getCurScope(), - SS, TemplateKWLoc, TemplateName, - ObjectType, EnteringContext, - Template)) { + if (TemplateNameKind TNK = Actions.ActOnDependentTemplateName( + getCurScope(), SS, TemplateKWLoc, TemplateName, ObjectType, + EnteringContext, Template, /*AllowInjectedClassName*/ true)) { if (AnnotateTemplateIdToken(Template, TNK, SS, TemplateKWLoc, TemplateName, false)) return true; @@ -509,12 +510,10 @@ bool Parser::ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS, Diag(Tok.getLocation(), DiagID) << II.getName() << FixItHint::CreateInsertion(Tok.getLocation(), "template "); - - if (TemplateNameKind TNK - = Actions.ActOnDependentTemplateName(getCurScope(), - SS, SourceLocation(), - TemplateName, ObjectType, - EnteringContext, Template)) { + + if (TemplateNameKind TNK = Actions.ActOnDependentTemplateName( + getCurScope(), SS, SourceLocation(), TemplateName, ObjectType, + EnteringContext, Template, /*AllowInjectedClassName*/ true)) { // Consume the identifier. ConsumeToken(); if (AnnotateTemplateIdToken(Template, TNK, SS, SourceLocation(), @@ -550,6 +549,7 @@ ExprResult Parser::tryParseCXXIdExpression(CXXScopeSpec &SS, bool isAddressOfOpe /*EnteringContext=*/false, /*AllowDestructorName=*/false, /*AllowConstructorName=*/false, + /*AllowDeductionGuide=*/false, /*ObjectType=*/nullptr, TemplateKWLoc, Name)) return ExprError(); @@ -735,7 +735,7 @@ ExprResult Parser::TryParseLambdaExpression() { /// sometimes skip the initializers for init-captures and not fully /// populate \p Intro. This flag will be set to \c true if we do so. /// \return A DiagnosticID if it hit something unexpected. The location for -/// for the diagnostic is that of the current token. +/// the diagnostic is that of the current token. Optional Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro, bool *SkippedInits) { typedef Optional DiagResult; @@ -902,6 +902,8 @@ Optional Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro, SourceLocation StartLoc = Tok.getLocation(); InMessageExpressionRAIIObject MaybeInMessageExpression(*this, true); Init = ParseInitializer(); + if (!Init.isInvalid()) + Init = Actions.CorrectDelayedTyposInExpr(Init.get()); if (Tok.getLocation() != StartLoc) { // Back out the lexing of the token after the initializer. @@ -1641,9 +1643,10 @@ ExprResult Parser::ParseCXXThis() { /// typename-specifier '(' expression-list[opt] ')' /// [C++0x] typename-specifier braced-init-list /// +/// In C++1z onwards, the type specifier can also be a template-name. ExprResult Parser::ParseCXXTypeConstructExpression(const DeclSpec &DS) { - Declarator DeclaratorInfo(DS, Declarator::TypeNameContext); + Declarator DeclaratorInfo(DS, Declarator::FunctionalCastContext); ParsedType TypeRep = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo).get(); assert((Tok.is(tok::l_paren) || @@ -1816,8 +1819,7 @@ Sema::ConditionResult Parser::ParseCXXCondition(StmtResult *InitStmt, } if (!InitExpr.isInvalid()) - Actions.AddInitializerToDecl(DeclOut, InitExpr.get(), !CopyInitialization, - DS.containsPlaceholderType()); + Actions.AddInitializerToDecl(DeclOut, InitExpr.get(), !CopyInitialization); else Actions.ActOnInitializerError(DeclOut); @@ -2022,9 +2024,11 @@ bool Parser::ParseUnqualifiedIdTemplateId(CXXScopeSpec &SS, case UnqualifiedId::IK_OperatorFunctionId: case UnqualifiedId::IK_LiteralOperatorId: if (AssumeTemplateId) { - TNK = Actions.ActOnDependentTemplateName(getCurScope(), SS, TemplateKWLoc, - Id, ObjectType, EnteringContext, - Template); + // We defer the injected-class-name checks until we've found whether + // this template-id is used to form a nested-name-specifier or not. + TNK = Actions.ActOnDependentTemplateName( + getCurScope(), SS, TemplateKWLoc, Id, ObjectType, EnteringContext, + Template, /*AllowInjectedClassName*/ true); if (TNK == TNK_Non_template) return true; } else { @@ -2053,10 +2057,9 @@ bool Parser::ParseUnqualifiedIdTemplateId(CXXScopeSpec &SS, Diag(Id.StartLocation, diag::err_missing_dependent_template_keyword) << Name << FixItHint::CreateInsertion(Id.StartLocation, "template "); - TNK = Actions.ActOnDependentTemplateName(getCurScope(), - SS, TemplateKWLoc, Id, - ObjectType, EnteringContext, - Template); + TNK = Actions.ActOnDependentTemplateName( + getCurScope(), SS, TemplateKWLoc, Id, ObjectType, EnteringContext, + Template, /*AllowInjectedClassName*/ true); if (TNK == TNK_Non_template) return true; } @@ -2079,10 +2082,9 @@ bool Parser::ParseUnqualifiedIdTemplateId(CXXScopeSpec &SS, bool MemberOfUnknownSpecialization; TemplateName.setIdentifier(Name, NameLoc); if (ObjectType) { - TNK = Actions.ActOnDependentTemplateName(getCurScope(), - SS, TemplateKWLoc, TemplateName, - ObjectType, EnteringContext, - Template); + TNK = Actions.ActOnDependentTemplateName( + getCurScope(), SS, TemplateKWLoc, TemplateName, ObjectType, + EnteringContext, Template, /*AllowInjectedClassName*/ true); if (TNK == TNK_Non_template) return true; } else { @@ -2157,7 +2159,7 @@ bool Parser::ParseUnqualifiedIdTemplateId(CXXScopeSpec &SS, // Constructor and destructor names. TypeResult Type = Actions.ActOnTemplateIdType(SS, TemplateKWLoc, - Template, NameLoc, + Template, Name, NameLoc, LAngleLoc, TemplateArgsPtr, RAngleLoc, /*IsCtorOrDtorName=*/true); if (Type.isInvalid()) @@ -2434,6 +2436,8 @@ bool Parser::ParseUnqualifiedIdOperator(CXXScopeSpec &SS, bool EnteringContext, /// /// \param AllowConstructorName whether we allow parsing a constructor name. /// +/// \param AllowDeductionGuide whether we allow parsing a deduction guide name. +/// /// \param ObjectType if this unqualified-id occurs within a member access /// expression, the type of the base object whose member is being accessed. /// @@ -2443,6 +2447,7 @@ bool Parser::ParseUnqualifiedIdOperator(CXXScopeSpec &SS, bool EnteringContext, bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext, bool AllowDestructorName, bool AllowConstructorName, + bool AllowDeductionGuide, ParsedType ObjectType, SourceLocation& TemplateKWLoc, UnqualifiedId &Result) { @@ -2471,6 +2476,7 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext, return false; } + ParsedTemplateTy TemplateName; if (AllowConstructorName && Actions.isCurrentClassName(*Id, getCurScope(), &SS)) { // We have parsed a constructor name. @@ -2479,6 +2485,12 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext, /*IsCtorOrDtorName=*/true, /*NonTrivialTypeSourceInfo=*/true); Result.setConstructorName(Ty, IdLoc, IdLoc); + } else if (getLangOpts().CPlusPlus1z && + AllowDeductionGuide && SS.isEmpty() && + Actions.isDeductionGuideName(getCurScope(), *Id, IdLoc, + &TemplateName)) { + // We have parsed a template-name naming a deduction guide. + Result.setDeductionGuideName(TemplateName, IdLoc); } else { // We have parsed an identifier. Result.setIdentifier(Id, IdLoc); @@ -2571,7 +2583,8 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext, if (SS.isEmpty() && Tok.is(tok::kw_decltype)) { DeclSpec DS(AttrFactory); SourceLocation EndLoc = ParseDecltypeSpecifier(DS); - if (ParsedType Type = Actions.getDestructorType(DS, ObjectType)) { + if (ParsedType Type = + Actions.getDestructorTypeForDecltype(DS, ObjectType)) { Result.setDestructorName(TildeLoc, Type, EndLoc); return false; } diff --git a/tools/clang/lib/Sema/DeclSpec.cpp b/tools/clang/lib/Sema/DeclSpec.cpp index d777311..da2fc11 100644 --- a/tools/clang/lib/Sema/DeclSpec.cpp +++ b/tools/clang/lib/Sema/DeclSpec.cpp @@ -560,7 +560,7 @@ bool DeclSpec::SetStorageClassSpec(Sema &S, SCS SC, SourceLocation Loc, // OpenCL v1.2 s6.8 changes this to "The auto and register storage-class // specifiers are not supported." if (S.getLangOpts().OpenCL && - !S.getOpenCLOptions().cl_clang_storage_class_specifiers) { + !S.getOpenCLOptions().isEnabled("cl_clang_storage_class_specifiers")) { switch (SC) { case SCS_extern: case SCS_private_extern: diff --git a/tools/clang/lib/Sema/Sema.cpp b/tools/clang/lib/Sema/Sema.cpp index eb53a70..4ee05b3 100644 --- a/tools/clang/lib/Sema/Sema.cpp +++ b/tools/clang/lib/Sema/Sema.cpp @@ -220,44 +220,74 @@ void Sema::Initialize() { addImplicitTypedef("size_t", Context.getSizeType()); } - // Initialize predefined OpenCL types and supported optional core features. + // Initialize predefined OpenCL types and supported extensions and (optional) + // core features. if (getLangOpts().OpenCL) { -#define OPENCLEXT(Ext) \ - if (Context.getTargetInfo().getSupportedOpenCLOpts().is_##Ext##_supported_core( \ - getLangOpts().OpenCLVersion)) \ - getOpenCLOptions().Ext = 1; -#include "clang/Basic/OpenCLExtensions.def" - + getOpenCLOptions().addSupport(Context.getTargetInfo().getSupportedOpenCLOpts()); + getOpenCLOptions().enableSupportedCore(getLangOpts().OpenCLVersion); addImplicitTypedef("sampler_t", Context.OCLSamplerTy); addImplicitTypedef("event_t", Context.OCLEventTy); if (getLangOpts().OpenCLVersion >= 200) { addImplicitTypedef("clk_event_t", Context.OCLClkEventTy); addImplicitTypedef("queue_t", Context.OCLQueueTy); - addImplicitTypedef("ndrange_t", Context.OCLNDRangeTy); addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy); addImplicitTypedef("atomic_int", Context.getAtomicType(Context.IntTy)); addImplicitTypedef("atomic_uint", Context.getAtomicType(Context.UnsignedIntTy)); - addImplicitTypedef("atomic_long", Context.getAtomicType(Context.LongTy)); - addImplicitTypedef("atomic_ulong", - Context.getAtomicType(Context.UnsignedLongTy)); + auto AtomicLongT = Context.getAtomicType(Context.LongTy); + addImplicitTypedef("atomic_long", AtomicLongT); + auto AtomicULongT = Context.getAtomicType(Context.UnsignedLongTy); + addImplicitTypedef("atomic_ulong", AtomicULongT); addImplicitTypedef("atomic_float", Context.getAtomicType(Context.FloatTy)); - addImplicitTypedef("atomic_double", - Context.getAtomicType(Context.DoubleTy)); + auto AtomicDoubleT = Context.getAtomicType(Context.DoubleTy); + addImplicitTypedef("atomic_double", AtomicDoubleT); // OpenCLC v2.0, s6.13.11.6 requires that atomic_flag is implemented as // 32-bit integer and OpenCLC v2.0, s6.1.1 int is always 32-bit wide. addImplicitTypedef("atomic_flag", Context.getAtomicType(Context.IntTy)); - addImplicitTypedef("atomic_intptr_t", - Context.getAtomicType(Context.getIntPtrType())); - addImplicitTypedef("atomic_uintptr_t", - Context.getAtomicType(Context.getUIntPtrType())); - addImplicitTypedef("atomic_size_t", - Context.getAtomicType(Context.getSizeType())); - addImplicitTypedef("atomic_ptrdiff_t", - Context.getAtomicType(Context.getPointerDiffType())); + auto AtomicIntPtrT = Context.getAtomicType(Context.getIntPtrType()); + addImplicitTypedef("atomic_intptr_t", AtomicIntPtrT); + auto AtomicUIntPtrT = Context.getAtomicType(Context.getUIntPtrType()); + addImplicitTypedef("atomic_uintptr_t", AtomicUIntPtrT); + auto AtomicSizeT = Context.getAtomicType(Context.getSizeType()); + addImplicitTypedef("atomic_size_t", AtomicSizeT); + auto AtomicPtrDiffT = Context.getAtomicType(Context.getPointerDiffType()); + addImplicitTypedef("atomic_ptrdiff_t", AtomicPtrDiffT); + + // OpenCL v2.0 s6.13.11.6: + // - The atomic_long and atomic_ulong types are supported if the + // cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics + // extensions are supported. + // - The atomic_double type is only supported if double precision + // is supported and the cl_khr_int64_base_atomics and + // cl_khr_int64_extended_atomics extensions are supported. + // - If the device address space is 64-bits, the data types + // atomic_intptr_t, atomic_uintptr_t, atomic_size_t and + // atomic_ptrdiff_t are supported if the cl_khr_int64_base_atomics and + // cl_khr_int64_extended_atomics extensions are supported. + std::vector Atomic64BitTypes; + Atomic64BitTypes.push_back(AtomicLongT); + Atomic64BitTypes.push_back(AtomicULongT); + Atomic64BitTypes.push_back(AtomicDoubleT); + if (Context.getTypeSize(AtomicSizeT) == 64) { + Atomic64BitTypes.push_back(AtomicSizeT); + Atomic64BitTypes.push_back(AtomicIntPtrT); + Atomic64BitTypes.push_back(AtomicUIntPtrT); + Atomic64BitTypes.push_back(AtomicPtrDiffT); + } + for (auto &I : Atomic64BitTypes) + setOpenCLExtensionForType(I, + "cl_khr_int64_base_atomics cl_khr_int64_extended_atomics"); + + setOpenCLExtensionForType(AtomicDoubleT, "cl_khr_fp64"); } - } + + setOpenCLExtensionForType(Context.DoubleTy, "cl_khr_fp64"); + +#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ + setOpenCLExtensionForType(Context.Id, Ext); +#include "clang/Basic/OpenCLImageTypes.def" + }; if (Context.getTargetInfo().hasBuiltinMSVaList()) { DeclarationName MSVaList = &Context.Idents.get("__builtin_ms_va_list"); @@ -310,7 +340,7 @@ bool Sema::makeUnavailableInSystemHeader(SourceLocation loc, if (!fn) return false; // If we're in template instantiation, it's an error. - if (!ActiveTemplateInstantiations.empty()) + if (inTemplateInstantiation()) return false; // If that function's not in a system header, it's an error. @@ -989,7 +1019,7 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { // and yet we also use the current diag ID on the DiagnosticsEngine. This has // been made more painfully obvious by the refactor that introduced this // function, but it is possible that the incoming argument can be - // eliminnated. If it truly cannot be (for example, there is some reentrancy + // eliminated. If it truly cannot be (for example, there is some reentrancy // issue I am not seeing yet), then there should at least be a clarifying // comment somewhere. if (Optional Info = isSFINAEContext()) { @@ -1077,13 +1107,8 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { // that is different from the last template instantiation where // we emitted an error, print a template instantiation // backtrace. - if (!DiagnosticIDs::isBuiltinNote(DiagID) && - !ActiveTemplateInstantiations.empty() && - ActiveTemplateInstantiations.back() - != LastTemplateInstantiationErrorContext) { - PrintInstantiationStack(); - LastTemplateInstantiationErrorContext = ActiveTemplateInstantiations.back(); - } + if (!DiagnosticIDs::isBuiltinNote(DiagID)) + PrintContextStack(); } Sema::SemaDiagnosticBuilder @@ -1218,21 +1243,21 @@ BlockScopeInfo *Sema::getCurBlock() { if (CurBSI && CurBSI->TheDecl && !CurBSI->TheDecl->Encloses(CurContext)) { // We have switched contexts due to template instantiation. - assert(!ActiveTemplateInstantiations.empty()); + assert(!CodeSynthesisContexts.empty()); return nullptr; } return CurBSI; } -LambdaScopeInfo *Sema::getCurLambda(bool IgnoreCapturedRegions) { +LambdaScopeInfo *Sema::getCurLambda(bool IgnoreNonLambdaCapturingScope) { if (FunctionScopes.empty()) return nullptr; auto I = FunctionScopes.rbegin(); - if (IgnoreCapturedRegions) { + if (IgnoreNonLambdaCapturingScope) { auto E = FunctionScopes.rend(); - while (I != E && isa(*I)) + while (I != E && isa(*I) && !isa(*I)) ++I; if (I == E) return nullptr; @@ -1241,7 +1266,7 @@ LambdaScopeInfo *Sema::getCurLambda(bool IgnoreCapturedRegions) { if (CurLSI && CurLSI->Lambda && !CurLSI->Lambda->Encloses(CurContext)) { // We have switched contexts due to template instantiation. - assert(!ActiveTemplateInstantiations.empty()); + assert(!CodeSynthesisContexts.empty()); return nullptr; } @@ -1555,3 +1580,85 @@ const llvm::MapVector & Sema::getMismatchingDeleteExpressions() const { return DeleteExprs; } + +void Sema::setOpenCLExtensionForType(QualType T, llvm::StringRef ExtStr) { + if (ExtStr.empty()) + return; + llvm::SmallVector Exts; + ExtStr.split(Exts, " ", /* limit */ -1, /* keep empty */ false); + auto CanT = T.getCanonicalType().getTypePtr(); + for (auto &I : Exts) + OpenCLTypeExtMap[CanT].insert(I.str()); +} + +void Sema::setOpenCLExtensionForDecl(Decl *FD, StringRef ExtStr) { + llvm::SmallVector Exts; + ExtStr.split(Exts, " ", /* limit */ -1, /* keep empty */ false); + if (Exts.empty()) + return; + for (auto &I : Exts) + OpenCLDeclExtMap[FD].insert(I.str()); +} + +void Sema::setCurrentOpenCLExtensionForType(QualType T) { + if (CurrOpenCLExtension.empty()) + return; + setOpenCLExtensionForType(T, CurrOpenCLExtension); +} + +void Sema::setCurrentOpenCLExtensionForDecl(Decl *D) { + if (CurrOpenCLExtension.empty()) + return; + setOpenCLExtensionForDecl(D, CurrOpenCLExtension); +} + +bool Sema::isOpenCLDisabledDecl(Decl *FD) { + auto Loc = OpenCLDeclExtMap.find(FD); + if (Loc == OpenCLDeclExtMap.end()) + return false; + for (auto &I : Loc->second) { + if (!getOpenCLOptions().isEnabled(I)) + return true; + } + return false; +} + +template +bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, + DiagInfoT DiagInfo, MapT &Map, + unsigned Selector, + SourceRange SrcRange) { + auto Loc = Map.find(D); + if (Loc == Map.end()) + return false; + bool Disabled = false; + for (auto &I : Loc->second) { + if (I != CurrOpenCLExtension && !getOpenCLOptions().isEnabled(I)) { + Diag(DiagLoc, diag::err_opencl_requires_extension) << Selector << DiagInfo + << I << SrcRange; + Disabled = true; + } + } + return Disabled; +} + +bool Sema::checkOpenCLDisabledTypeDeclSpec(const DeclSpec &DS, QualType QT) { + // Check extensions for declared types. + Decl *Decl = nullptr; + if (auto TypedefT = dyn_cast(QT.getTypePtr())) + Decl = TypedefT->getDecl(); + if (auto TagT = dyn_cast(QT.getCanonicalType().getTypePtr())) + Decl = TagT->getDecl(); + auto Loc = DS.getTypeSpecTypeLoc(); + if (checkOpenCLDisabledTypeOrDecl(Decl, Loc, QT, OpenCLDeclExtMap)) + return true; + + // Check extensions for builtin types. + return checkOpenCLDisabledTypeOrDecl(QT.getCanonicalType().getTypePtr(), Loc, + QT, OpenCLTypeExtMap); +} + +bool Sema::checkOpenCLDisabledDecl(const Decl &D, const Expr &E) { + return checkOpenCLDisabledTypeOrDecl(&D, E.getLocStart(), "", + OpenCLDeclExtMap, 1, D.getSourceRange()); +} diff --git a/tools/clang/lib/Sema/SemaDecl.cpp b/tools/clang/lib/Sema/SemaDecl.cpp index d36825a..378d766 100644 --- a/tools/clang/lib/Sema/SemaDecl.cpp +++ b/tools/clang/lib/Sema/SemaDecl.cpp @@ -67,7 +67,7 @@ class TypeNameValidatorCCC : public CorrectionCandidateCallback { TypeNameValidatorCCC(bool AllowInvalid, bool WantClass=false, bool AllowTemplates=false) : AllowInvalidDecl(AllowInvalid), WantClassName(WantClass), - AllowClassTemplates(AllowTemplates) { + AllowTemplates(AllowTemplates) { WantExpressionKeywords = false; WantCXXNamedCasts = false; WantRemainingKeywords = false; @@ -76,7 +76,7 @@ class TypeNameValidatorCCC : public CorrectionCandidateCallback { bool ValidateCandidate(const TypoCorrection &candidate) override { if (NamedDecl *ND = candidate.getCorrectionDecl()) { bool IsType = isa(ND) || isa(ND); - bool AllowedTemplate = AllowClassTemplates && isa(ND); + bool AllowedTemplate = AllowTemplates && getAsTypeTemplateDecl(ND); return (IsType || AllowedTemplate) && (AllowInvalidDecl || !ND->isInvalidDecl()); } @@ -86,7 +86,7 @@ class TypeNameValidatorCCC : public CorrectionCandidateCallback { private: bool AllowInvalidDecl; bool WantClassName; - bool AllowClassTemplates; + bool AllowTemplates; }; } // end anonymous namespace @@ -253,7 +253,13 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, ParsedType ObjectTypePtr, bool IsCtorOrDtorName, bool WantNontrivialTypeSourceInfo, + bool IsClassTemplateDeductionContext, IdentifierInfo **CorrectedII) { + // FIXME: Consider allowing this outside C++1z mode as an extension. + bool AllowDeducedTemplate = IsClassTemplateDeductionContext && + getLangOpts().CPlusPlus1z && !IsCtorOrDtorName && + !isClassName && !HasTrailingDot; + // Determine where we will perform name lookup. DeclContext *LookupCtx = nullptr; if (ObjectTypePtr) { @@ -335,10 +341,11 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, case LookupResult::NotFound: case LookupResult::NotFoundInCurrentInstantiation: if (CorrectedII) { - TypoCorrection Correction = CorrectTypo( - Result.getLookupNameInfo(), Kind, S, SS, - llvm::make_unique(true, isClassName), - CTK_ErrorRecovery); + TypoCorrection Correction = + CorrectTypo(Result.getLookupNameInfo(), Kind, S, SS, + llvm::make_unique( + true, isClassName, AllowDeducedTemplate), + CTK_ErrorRecovery); IdentifierInfo *NewII = Correction.getCorrectionAsIdentifierInfo(); TemplateTy Template; bool MemberOfUnknownSpecialization; @@ -360,7 +367,8 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, ParsedType Ty = getTypeName(*NewII, NameLoc, S, NewSSPtr, isClassName, HasTrailingDot, ObjectTypePtr, IsCtorOrDtorName, - WantNontrivialTypeSourceInfo); + WantNontrivialTypeSourceInfo, + IsClassTemplateDeductionContext); if (Ty) { diagnoseTypo(Correction, PDiag(diag::err_unknown_type_or_class_name_suggest) @@ -392,7 +400,8 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, // Look to see if we have a type anywhere in the list of results. for (LookupResult::iterator Res = Result.begin(), ResEnd = Result.end(); Res != ResEnd; ++Res) { - if (isa(*Res) || isa(*Res)) { + if (isa(*Res) || isa(*Res) || + (AllowDeducedTemplate && getAsTypeTemplateDecl(*Res))) { if (!IIDecl || (*Res)->getLocation().getRawEncoding() < IIDecl->getLocation().getRawEncoding()) @@ -426,33 +435,29 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, QualType T; if (TypeDecl *TD = dyn_cast(IIDecl)) { + // C++ [class.qual]p2: A lookup that would find the injected-class-name + // instead names the constructors of the class, except when naming a class. + // This is ill-formed when we're not actually forming a ctor or dtor name. + auto *LookupRD = dyn_cast_or_null(LookupCtx); + auto *FoundRD = dyn_cast(TD); + if (!isClassName && !IsCtorOrDtorName && LookupRD && FoundRD && + FoundRD->isInjectedClassName() && + declaresSameEntity(LookupRD, cast(FoundRD->getParent()))) + Diag(NameLoc, diag::err_out_of_line_qualified_id_type_names_constructor) + << &II << /*Type*/1; + DiagnoseUseOfDecl(IIDecl, NameLoc); T = Context.getTypeDeclType(TD); MarkAnyDeclReferenced(TD->getLocation(), TD, /*OdrUse=*/false); - - // NOTE: avoid constructing an ElaboratedType(Loc) if this is a - // constructor or destructor name (in such a case, the scope specifier - // will be attached to the enclosing Expr or Decl node). - if (SS && SS->isNotEmpty() && !IsCtorOrDtorName) { - if (WantNontrivialTypeSourceInfo) { - // Construct a type with type-source information. - TypeLocBuilder Builder; - Builder.pushTypeSpec(T).setNameLoc(NameLoc); - - T = getElaboratedType(ETK_None, *SS, T); - ElaboratedTypeLoc ElabTL = Builder.push(T); - ElabTL.setElaboratedKeywordLoc(SourceLocation()); - ElabTL.setQualifierLoc(SS->getWithLocInContext(Context)); - return CreateParsedType(T, Builder.getTypeSourceInfo(Context, T)); - } else { - T = getElaboratedType(ETK_None, *SS, T); - } - } } else if (ObjCInterfaceDecl *IDecl = dyn_cast(IIDecl)) { (void)DiagnoseUseOfDecl(IDecl, NameLoc); if (!HasTrailingDot) T = Context.getObjCInterfaceType(IDecl); + } else if (AllowDeducedTemplate) { + if (auto *TD = getAsTypeTemplateDecl(IIDecl)) + T = Context.getDeducedTemplateSpecializationType(TemplateName(TD), + QualType(), false); } if (T.isNull()) { @@ -460,6 +465,27 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, Result.suppressDiagnostics(); return nullptr; } + + // NOTE: avoid constructing an ElaboratedType(Loc) if this is a + // constructor or destructor name (in such a case, the scope specifier + // will be attached to the enclosing Expr or Decl node). + if (SS && SS->isNotEmpty() && !IsCtorOrDtorName && + !isa(IIDecl)) { + if (WantNontrivialTypeSourceInfo) { + // Construct a type with type-source information. + TypeLocBuilder Builder; + Builder.pushTypeSpec(T).setNameLoc(NameLoc); + + T = getElaboratedType(ETK_None, *SS, T); + ElaboratedTypeLoc ElabTL = Builder.push(T); + ElabTL.setElaboratedKeywordLoc(SourceLocation()); + ElabTL.setQualifierLoc(SS->getWithLocInContext(Context)); + return CreateParsedType(T, Builder.getTypeSourceInfo(Context, T)); + } else { + T = getElaboratedType(ETK_None, *SS, T); + } + } + return ParsedType::make(T); } @@ -637,6 +663,7 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II, if (Corrected.getCorrectionSpecifier()) tmpSS.MakeTrivial(Context, Corrected.getCorrectionSpecifier(), SourceRange(IILoc)); + // FIXME: Support class template argument deduction here. SuggestedType = getTypeName(*Corrected.getCorrectionAsIdentifierInfo(), IILoc, S, tmpSS.isSet() ? &tmpSS : SS, false, false, nullptr, @@ -657,7 +684,8 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II, Name, nullptr, true, TemplateResult, MemberOfUnknownSpecialization) == TNK_Type_template) { TemplateName TplName = TemplateResult.get(); - Diag(IILoc, diag::err_template_missing_args) << TplName; + Diag(IILoc, diag::err_template_missing_args) + << (int)getTemplateNameKindForDiagnostics(TplName) << TplName; if (TemplateDecl *TplDecl = TplName.getAsTemplateDecl()) { Diag(TplDecl->getLocation(), diag::note_template_decl_here) << TplDecl->getTemplateParameters()->getSourceRange(); @@ -783,6 +811,13 @@ Sema::ClassifyName(Scope *S, CXXScopeSpec &SS, IdentifierInfo *&Name, if (NextToken.is(tok::coloncolon)) { NestedNameSpecInfo IdInfo(Name, NameLoc, NextToken.getLocation()); BuildCXXNestedNameSpecifier(S, IdInfo, false, SS, nullptr, false); + } else if (getLangOpts().CPlusPlus && SS.isSet() && + isCurrentClassName(*Name, S, &SS)) { + // Per [class.qual]p2, this names the constructors of SS, not the + // injected-class-name. We don't have a classification for that. + // There's not much point caching this result, since the parser + // will reject it later. + return NameClassification::Unknown(); } LookupResult Result(*this, Name, NameLoc, LookupOrdinaryName); @@ -1045,7 +1080,8 @@ Sema::ClassifyName(Scope *S, CXXScopeSpec &SS, IdentifierInfo *&Name, } // We can have a type template here if we're classifying a template argument. - if (isa(FirstDecl) && !isa(FirstDecl)) + if (isa(FirstDecl) && !isa(FirstDecl) && + !isa(FirstDecl)) return NameClassification::TypeTemplate( TemplateName(cast(FirstDecl))); @@ -1072,6 +1108,24 @@ Sema::ClassifyName(Scope *S, CXXScopeSpec &SS, IdentifierInfo *&Name, return BuildDeclarationNameExpr(SS, Result, ADL); } +Sema::TemplateNameKindForDiagnostics +Sema::getTemplateNameKindForDiagnostics(TemplateName Name) { + auto *TD = Name.getAsTemplateDecl(); + if (!TD) + return TemplateNameKindForDiagnostics::DependentTemplate; + if (isa(TD)) + return TemplateNameKindForDiagnostics::ClassTemplate; + if (isa(TD)) + return TemplateNameKindForDiagnostics::FunctionTemplate; + if (isa(TD)) + return TemplateNameKindForDiagnostics::VarTemplate; + if (isa(TD)) + return TemplateNameKindForDiagnostics::AliasTemplate; + if (isa(TD)) + return TemplateNameKindForDiagnostics::TemplateTemplateParam; + return TemplateNameKindForDiagnostics::DependentTemplate; +} + // Determines the context to return to after temporarily entering a // context. This depends in an unnecessarily complicated way on the // exact ordering of callbacks from the parser. @@ -3035,7 +3089,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, // [...] A member shall not be declared twice in the // member-specification, except that a nested class or member // class template can be declared and then later defined. - if (ActiveTemplateInstantiations.empty()) { + if (!inTemplateInstantiation()) { unsigned NewDiag; if (isa(OldMethod)) NewDiag = diag::err_constructor_redeclared; @@ -4504,7 +4558,7 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS, // trivial in almost all cases, except if a union member has an in-class // initializer: // union { int n = 0; }; - ActOnUninitializedDecl(Anon, /*TypeMayContainAuto=*/false); + ActOnUninitializedDecl(Anon); } Anon->setImplicit(); @@ -4622,6 +4676,34 @@ Sema::GetNameFromUnqualifiedId(const UnqualifiedId &Name) { NameInfo.setLoc(Name.StartLocation); return NameInfo; + case UnqualifiedId::IK_DeductionGuideName: { + // C++ [temp.deduct.guide]p3: + // The simple-template-id shall name a class template specialization. + // The template-name shall be the same identifier as the template-name + // of the simple-template-id. + // These together intend to imply that the template-name shall name a + // class template. + // FIXME: template struct X {}; + // template using Y = X; + // Y(int) -> Y; + // satisfies these rules but does not name a class template. + TemplateName TN = Name.TemplateName.get().get(); + auto *Template = TN.getAsTemplateDecl(); + if (!Template || !isa(Template)) { + Diag(Name.StartLocation, + diag::err_deduction_guide_name_not_class_template) + << (int)getTemplateNameKindForDiagnostics(TN) << TN; + if (Template) + Diag(Template->getLocation(), diag::note_template_decl_here); + return DeclarationNameInfo(); + } + + NameInfo.setName( + Context.DeclarationNames.getCXXDeductionGuideName(Template)); + NameInfo.setLoc(Name.StartLocation); + return NameInfo; + } + case UnqualifiedId::IK_OperatorFunctionId: NameInfo.setName(Context.DeclarationNames.getCXXOperatorName( Name.OperatorFunctionId.Operator)); @@ -4839,6 +4921,9 @@ Decl *Sema::ActOnDeclarator(Scope *S, Declarator &D) { Dcl && Dcl->getDeclContext()->isFileContext()) Dcl->setTopLevelDeclInObjCContainer(); + if (getLangOpts().OpenCL) + setCurrentOpenCLExtensionForDecl(Dcl); + return Dcl; } @@ -5379,8 +5464,13 @@ Sema::ActOnTypedefDeclarator(Scope* S, Declarator& D, DeclContext* DC, diag::err_concept_wrong_decl_kind); if (D.getName().Kind != UnqualifiedId::IK_Identifier) { - Diag(D.getName().StartLocation, diag::err_typedef_not_identifier) - << D.getName().getSourceRange(); + if (D.getName().Kind == UnqualifiedId::IK_DeductionGuideName) + Diag(D.getName().StartLocation, + diag::err_deduction_guide_invalid_specifier) + << "typedef"; + else + Diag(D.getName().StartLocation, diag::err_typedef_not_identifier) + << D.getName().getSourceRange(); return nullptr; } @@ -5646,13 +5736,17 @@ static void checkDLLAttributeRedeclaration(Sema &S, NamedDecl *OldDecl, if (OldDecl->isInvalidDecl()) return; + bool IsTemplate = false; if (TemplateDecl *OldTD = dyn_cast(OldDecl)) { OldDecl = OldTD->getTemplatedDecl(); + IsTemplate = true; if (!IsSpecialization) IsDefinition = false; } - if (TemplateDecl *NewTD = dyn_cast(NewDecl)) + if (TemplateDecl *NewTD = dyn_cast(NewDecl)) { NewDecl = NewTD->getTemplatedDecl(); + IsTemplate = true; + } if (!OldDecl || !NewDecl) return; @@ -5705,9 +5799,10 @@ static void checkDLLAttributeRedeclaration(Sema &S, NamedDecl *OldDecl, } // A redeclaration is not allowed to drop a dllimport attribute, the only - // exceptions being inline function definitions, local extern declarations, - // qualified friend declarations or special MSVC extension: in the last case, - // the declaration is treated as if it were marked dllexport. + // exceptions being inline function definitions (except for function + // templates), local extern declarations, qualified friend declarations or + // special MSVC extension: in the last case, the declaration is treated as if + // it were marked dllexport. bool IsInline = false, IsStaticDataMember = false, IsQualifiedFriend = false; bool IsMicrosoft = S.Context.getTargetInfo().getCXXABI().isMicrosoft(); if (const auto *VD = dyn_cast(NewDecl)) { @@ -5722,7 +5817,8 @@ static void checkDLLAttributeRedeclaration(Sema &S, NamedDecl *OldDecl, FD->getFriendObjectKind() == Decl::FOK_Declared; } - if (OldImportAttr && !HasNewAttr && !IsInline && !IsStaticDataMember && + if (OldImportAttr && !HasNewAttr && + (!IsInline || (IsMicrosoft && IsTemplate)) && !IsStaticDataMember && !NewDecl->isLocalExternDecl() && !IsQualifiedFriend) { if (IsMicrosoft && IsDefinition) { S.Diag(NewDecl->getLocation(), @@ -5899,8 +5995,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( Name = II; } } else if (!II) { - Diag(D.getIdentifierLoc(), diag::err_bad_variable_name) - << Name; + Diag(D.getIdentifierLoc(), diag::err_bad_variable_name) << Name; return nullptr; } @@ -5940,7 +6035,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( NR = NR->getPointeeType(); } - if (!getOpenCLOptions().cl_khr_fp16) { + if (!getOpenCLOptions().isEnabled("cl_khr_fp16")) { // OpenCL v1.2 s6.1.1.1: reject declaring variables of the half and // half array type (unless the cl_khr_fp16 extension is enabled). if (Context.getBaseElementType(R)->isHalfType()) { @@ -6014,7 +6109,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( } } - bool IsExplicitSpecialization = false; + bool IsMemberSpecialization = false; bool IsVariableTemplateSpecialization = false; bool IsPartialSpecialization = false; bool IsVariableTemplate = false; @@ -6026,7 +6121,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( D.getIdentifierLoc(), II, R, TInfo, SC); - if (D.getDeclSpec().containsPlaceholderType() && R->getContainedAutoType()) + if (R->getContainedDeducedType()) ParsingInitForAutoVars.insert(NewVD); if (D.isInvalidType()) @@ -6092,7 +6187,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( ? D.getName().TemplateId : nullptr, TemplateParamLists, - /*never a friend*/ false, IsExplicitSpecialization, Invalid); + /*never a friend*/ false, IsMemberSpecialization, Invalid); if (TemplateParams) { if (!TemplateParams->size() && @@ -6162,7 +6257,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( // If this decl has an auto type in need of deduction, make a note of the // Decl so we can diagnose uses of it in its own initializer. - if (D.getDeclSpec().containsPlaceholderType() && R->getContainedAutoType()) + if (R->getContainedDeducedType()) ParsingInitForAutoVars.insert(NewVD); if (D.isInvalidType() || Invalid) { @@ -6318,7 +6413,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( << (IsPartialSpecialization ? 1 : 0) << FixItHint::CreateRemoval( D.getDeclSpec().getModulePrivateSpecLoc()); - else if (IsExplicitSpecialization) + else if (IsMemberSpecialization) Diag(NewVD->getLocation(), diag::err_module_private_specialization) << 2 << FixItHint::CreateRemoval(D.getDeclSpec().getModulePrivateSpecLoc()); @@ -6423,16 +6518,17 @@ NamedDecl *Sema::ActOnVariableDeclarator( } } - // Diagnose shadowed variables before filtering for scope. - if (D.getCXXScopeSpec().isEmpty()) - CheckShadow(S, NewVD, Previous); + // Find the shadowed declaration before filtering for scope. + NamedDecl *ShadowedDecl = D.getCXXScopeSpec().isEmpty() + ? getShadowedDeclaration(NewVD, Previous) + : nullptr; // Don't consider existing declarations that are in a different // scope and are out-of-semantic-context declarations (if the new // declaration has linkage). FilterLookupForScope(Previous, OriginalDC, S, shouldConsiderLinkage(NewVD), D.getCXXScopeSpec().isNotEmpty() || - IsExplicitSpecialization || + IsMemberSpecialization || IsVariableTemplateSpecialization); // Check whether the previous declaration is in the same block scope. This @@ -6447,7 +6543,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( D.setRedeclaration(CheckVariableDeclaration(NewVD, Previous)); } else { // If this is an explicit specialization of a static data member, check it. - if (IsExplicitSpecialization && !NewVD->isInvalidDecl() && + if (IsMemberSpecialization && !NewVD->isInvalidDecl() && CheckMemberSpecialization(NewVD, Previous)) NewVD->setInvalidDecl(); @@ -6520,6 +6616,10 @@ NamedDecl *Sema::ActOnVariableDeclarator( } } + // Diagnose shadowed variables iff this isn't a redeclaration. + if (ShadowedDecl && !D.isRedeclaration()) + CheckShadow(NewVD, ShadowedDecl, Previous); + ProcessPragmaWeak(S, NewVD); // If this is the first declaration of an extern C variable, update @@ -6558,7 +6658,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( if (D.isRedeclaration() && !Previous.empty()) { checkDLLAttributeRedeclaration( *this, dyn_cast(Previous.getRepresentativeDecl()), NewVD, - IsExplicitSpecialization, D.isFunctionDefinition()); + IsMemberSpecialization, D.isFunctionDefinition()); } if (NewTemplate) { @@ -6593,33 +6693,40 @@ static SourceLocation getCaptureLocation(const LambdaScopeInfo *LSI, return SourceLocation(); } -/// \brief Diagnose variable or built-in function shadowing. Implements -/// -Wshadow. -/// -/// This method is called whenever a VarDecl is added to a "useful" -/// scope. -/// -/// \param S the scope in which the shadowing name is being declared -/// \param R the lookup of the name -/// -void Sema::CheckShadow(Scope *S, VarDecl *D, const LookupResult& R) { +/// \brief Return the declaration shadowed by the given variable \p D, or null +/// if it doesn't shadow any declaration or shadowing warnings are disabled. +NamedDecl *Sema::getShadowedDeclaration(const VarDecl *D, + const LookupResult &R) { // Return if warning is ignored. if (Diags.isIgnored(diag::warn_decl_shadow, R.getNameLoc())) - return; + return nullptr; // Don't diagnose declarations at file scope. if (D->hasGlobalStorage()) - return; - - DeclContext *NewDC = D->getDeclContext(); + return nullptr; // Only diagnose if we're shadowing an unambiguous field or variable. if (R.getResultKind() != LookupResult::Found) - return; + return nullptr; - NamedDecl* ShadowedDecl = R.getFoundDecl(); - if (!isa(ShadowedDecl) && !isa(ShadowedDecl)) - return; + NamedDecl *ShadowedDecl = R.getFoundDecl(); + return isa(ShadowedDecl) || isa(ShadowedDecl) + ? ShadowedDecl + : nullptr; +} + +/// \brief Diagnose variable or built-in function shadowing. Implements +/// -Wshadow. +/// +/// This method is called whenever a VarDecl is added to a "useful" +/// scope. +/// +/// \param ShadowedDecl the declaration that is shadowed by the given variable +/// \param R the lookup of the name +/// +void Sema::CheckShadow(VarDecl *D, NamedDecl *ShadowedDecl, + const LookupResult &R) { + DeclContext *NewDC = D->getDeclContext(); if (FieldDecl *FD = dyn_cast(ShadowedDecl)) { // Fields are not shadowed by variables in C++ static methods. @@ -6730,7 +6837,8 @@ void Sema::CheckShadow(Scope *S, VarDecl *D) { LookupResult R(*this, D->getDeclName(), D->getLocation(), Sema::LookupOrdinaryName, Sema::ForRedeclaration); LookupName(R, S); - CheckShadow(S, D, R); + if (NamedDecl *ShadowedDecl = getShadowedDeclaration(D, R)) + CheckShadow(D, ShadowedDecl, R); } /// Check if 'E', which is an expression that is about to be modified, refers @@ -6910,7 +7018,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { // OpenCL v1.2 s6.8 - The static qualifier is valid only in program // scope. if (getLangOpts().OpenCLVersion == 120 && - !getOpenCLOptions().cl_clang_storage_class_specifiers && + !getOpenCLOptions().isEnabled("cl_clang_storage_class_specifiers") && NewVD->isStaticLocal()) { Diag(NewVD->getLocation(), diag::err_static_function_scope); NewVD->setInvalidDecl(); @@ -7414,6 +7522,7 @@ static StorageClass getFunctionStorageClass(Sema &SemaRef, Declarator &D) { case DeclSpec::SCS_mutable: SemaRef.Diag(D.getDeclSpec().getStorageClassSpecLoc(), diag::err_typecheck_sclass_func); + D.getMutableDeclSpec().ClearStorageClassSpecs(); D.setInvalidType(); break; case DeclSpec::SCS_unspecified: break; @@ -7456,11 +7565,12 @@ static FunctionDecl* CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, // Determine whether the function was written with a // prototype. This true when: // - there is a prototype in the declarator, or - // - the type R of the function is some kind of typedef or other reference - // to a type name (which eventually refers to a function type). + // - the type R of the function is some kind of typedef or other non- + // attributed reference to a type name (which eventually refers to a + // function type). bool HasPrototype = (D.isFunctionDeclarator() && D.getFunctionTypeInfo().hasPrototype) || - (!isa(R.getTypePtr()) && R->isFunctionProtoType()); + (!R->getAsAdjusted() && R->isFunctionProtoType()); NewFD = FunctionDecl::Create(SemaRef.Context, DC, D.getLocStart(), NameInfo, R, @@ -7546,6 +7656,12 @@ static FunctionDecl* CreateNewFunctionDecl(Sema &SemaRef, Declarator &D, R, TInfo, isInline, isExplicit, isConstexpr, SourceLocation()); + } else if (Name.getNameKind() == DeclarationName::CXXDeductionGuideName) { + SemaRef.CheckDeductionGuideDeclarator(D, R, SC); + + return CXXDeductionGuideDecl::Create(SemaRef.Context, DC, D.getLocStart(), + isExplicit, NameInfo, R, TInfo, + D.getLocEnd()); } else if (DC->isRecord()) { // If the name of the function is the same as the name of the record, // then this must be an invalid constructor that has a return type. @@ -7618,7 +7734,7 @@ static OpenCLParamType getOpenCLKernelParameterType(Sema &S, QualType PT) { // OpenCL extension spec v1.2 s9.5: // This extension adds support for half scalar and vector types as built-in // types that can be used for arithmetic operations, conversions etc. - if (!S.getOpenCLOptions().cl_khr_fp16 && PT->isHalfType()) + if (!S.getOpenCLOptions().isEnabled("cl_khr_fp16") && PT->isHalfType()) return InvalidKernelParam; if (PT->isRecordType()) @@ -7819,7 +7935,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, bool isFriend = false; FunctionTemplateDecl *FunctionTemplate = nullptr; - bool isExplicitSpecialization = false; + bool isMemberSpecialization = false; bool isFunctionTemplateSpecialization = false; bool isDependentClassScopeExplicitSpecialization = false; @@ -7875,7 +7991,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, } SetNestedNameSpecifier(NewFD, D); - isExplicitSpecialization = false; + isMemberSpecialization = false; isFunctionTemplateSpecialization = false; if (D.isInvalidType()) NewFD->setInvalidDecl(); @@ -7890,7 +8006,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, D.getName().getKind() == UnqualifiedId::IK_TemplateId ? D.getName().TemplateId : nullptr, - TemplateParamLists, isFriend, isExplicitSpecialization, + TemplateParamLists, isFriend, isMemberSpecialization, Invalid)) { if (TemplateParams->size() > 0) { // This is a function template @@ -8034,7 +8150,8 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, // The explicit specifier shall be used only in the declaration of a // constructor or conversion function within its class definition; // see 12.3.1 and 12.3.2. - if (isExplicit && !NewFD->isInvalidDecl()) { + if (isExplicit && !NewFD->isInvalidDecl() && + !isa(NewFD)) { if (!CurContext->isRecord()) { // 'explicit' was specified outside of the class. Diag(D.getDeclSpec().getExplicitSpecLoc(), @@ -8223,7 +8340,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, // Filter out previous declarations that don't match the scope. FilterLookupForScope(Previous, OriginalDC, S, shouldConsiderLinkage(NewFD), D.getCXXScopeSpec().isNotEmpty() || - isExplicitSpecialization || + isMemberSpecialization || isFunctionTemplateSpecialization); // Handle GNU asm-label extension (encoded as an attribute). @@ -8373,7 +8490,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (!getLangOpts().CPlusPlus) { // Perform semantic checking on the function declaration. - bool isExplicitSpecialization=false; if (!NewFD->isInvalidDecl() && NewFD->isMain()) CheckMain(NewFD, D.getDeclSpec()); @@ -8382,7 +8498,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (!NewFD->isInvalidDecl()) D.setRedeclaration(CheckFunctionDeclaration(S, NewFD, Previous, - isExplicitSpecialization)); + isMemberSpecialization)); else if (!Previous.empty()) // Recover gracefully from an invalid redeclaration. D.setRedeclaration(true); @@ -8517,7 +8633,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, << FixItHint::CreateRemoval( D.getDeclSpec().getStorageClassSpecLoc()); } - } else if (isExplicitSpecialization && isa(NewFD)) { + } else if (isMemberSpecialization && isa(NewFD)) { if (CheckMemberSpecialization(NewFD, Previous)) NewFD->setInvalidDecl(); } @@ -8532,7 +8648,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (!NewFD->isInvalidDecl()) D.setRedeclaration(CheckFunctionDeclaration(S, NewFD, Previous, - isExplicitSpecialization)); + isMemberSpecialization)); else if (!Previous.empty()) // Recover gracefully from an invalid redeclaration. D.setRedeclaration(true); @@ -8638,7 +8754,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, } else if (!D.isFunctionDefinition() && isa(NewFD) && NewFD->isOutOfLine() && !isFriend && !isFunctionTemplateSpecialization && - !isExplicitSpecialization) { + !isMemberSpecialization) { // An out-of-line member function declaration must also be a // definition (C++ [class.mfct]p2). // Note that this is not the case for explicit specializations of @@ -8699,7 +8815,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (D.isRedeclaration() && !Previous.empty()) { checkDLLAttributeRedeclaration( *this, dyn_cast(Previous.getRepresentativeDecl()), NewFD, - isExplicitSpecialization || isFunctionTemplateSpecialization, + isMemberSpecialization || isFunctionTemplateSpecialization, D.isFunctionDefinition()); } @@ -8824,15 +8940,16 @@ bool Sema::shouldLinkDependentDeclWithPrevious(Decl *D, Decl *PrevDecl) { /// that have been instantiated via C++ template instantiation (called /// via InstantiateDecl). /// -/// \param IsExplicitSpecialization whether this new function declaration is -/// an explicit specialization of the previous declaration. +/// \param IsMemberSpecialization whether this new function declaration is +/// a member specialization (that replaces any definition provided by the +/// previous declaration). /// /// This sets NewFD->isInvalidDecl() to true if there was an error. /// /// \returns true if the function declaration is a redeclaration. bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, LookupResult &Previous, - bool IsExplicitSpecialization) { + bool IsMemberSpecialization) { assert(!NewFD->getReturnType()->isVariablyModifiedType() && "Variably modified return types are not handled here"); @@ -8945,7 +9062,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, // Warn that we did this, if we're not performing template instantiation. // In that case, we'll have warned already when the template was defined. - if (ActiveTemplateInstantiations.empty()) { + if (!inTemplateInstantiation()) { SourceLocation AddConstLoc; if (FunctionTypeLoc FTL = MD->getTypeSourceInfo()->getTypeLoc() .IgnoreParens().getAs()) @@ -8982,7 +9099,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, // If this is an explicit specialization of a member that is a function // template, mark it as a member specialization. - if (IsExplicitSpecialization && + if (IsMemberSpecialization && NewTemplateDecl->getInstantiatedFromMemberTemplate()) { NewTemplateDecl->setMemberSpecialization(); assert(OldTemplateDecl->isMemberSpecialization()); @@ -9032,6 +9149,15 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, } else if (CXXConversionDecl *Conversion = dyn_cast(NewFD)) { ActOnConversionDeclarator(Conversion); + } else if (auto *Guide = dyn_cast(NewFD)) { + if (auto *TD = Guide->getDescribedFunctionTemplate()) + CheckDeductionGuideTemplate(TD); + + // A deduction guide is not on the list of entities that can be + // explicitly specialized. + if (Guide->getTemplateSpecializationKind() == TSK_ExplicitSpecialization) + Diag(Guide->getLocStart(), diag::err_deduction_guide_specialized) + << /*explicit specialization*/ 1; } // Find any virtual functions that this function overrides. @@ -9687,11 +9813,36 @@ QualType Sema::deduceVarTypeFromInitializer(VarDecl *VDecl, VarDeclOrName VN{VDecl, Name}; - ArrayRef DeduceInits = Init; + DeducedType *Deduced = Type->getContainedDeducedType(); + assert(Deduced && "deduceVarTypeFromInitializer for non-deduced type"); + + // C++11 [dcl.spec.auto]p3 + if (!Init) { + assert(VDecl && "no init for init capture deduction?"); + Diag(VDecl->getLocation(), diag::err_auto_var_requires_init) + << VDecl->getDeclName() << Type; + return QualType(); + } + + ArrayRef DeduceInits = Init; if (DirectInit) { - if (auto *PL = dyn_cast(Init)) + if (auto *PL = dyn_cast_or_null(Init)) DeduceInits = PL->exprs(); - else if (auto *IL = dyn_cast(Init)) + } + + if (isa(Deduced)) { + assert(VDecl && "non-auto type for init capture deduction?"); + InitializedEntity Entity = InitializedEntity::InitializeVariable(VDecl); + InitializationKind Kind = InitializationKind::CreateForInit( + VDecl->getLocation(), DirectInit, Init); + // FIXME: Initialization should not be taking a mutable list of inits. + SmallVector InitsCopy(DeduceInits.begin(), DeduceInits.end()); + return DeduceTemplateSpecializationFromInitializer(TSI, Entity, Kind, + InitsCopy); + } + + if (DirectInit) { + if (auto *IL = dyn_cast(Init)) DeduceInits = IL->inits(); } @@ -9768,8 +9919,8 @@ QualType Sema::deduceVarTypeFromInitializer(VarDecl *VDecl, // checks. // We only want to warn outside of template instantiations, though: // inside a template, the 'id' could have come from a parameter. - if (ActiveTemplateInstantiations.empty() && !DefaultedAnyToId && - !IsInitCapture && !DeducedType.isNull() && DeducedType->isObjCIdType()) { + if (!inTemplateInstantiation() && !DefaultedAnyToId && !IsInitCapture && + !DeducedType.isNull() && DeducedType->isObjCIdType()) { SourceLocation Loc = TSI->getTypeLoc().getBeginLoc(); Diag(Loc, diag::warn_auto_var_is_id) << VN << Range; } @@ -9777,11 +9928,40 @@ QualType Sema::deduceVarTypeFromInitializer(VarDecl *VDecl, return DeducedType; } +bool Sema::DeduceVariableDeclarationType(VarDecl *VDecl, bool DirectInit, + Expr *Init) { + QualType DeducedType = deduceVarTypeFromInitializer( + VDecl, VDecl->getDeclName(), VDecl->getType(), VDecl->getTypeSourceInfo(), + VDecl->getSourceRange(), DirectInit, Init); + if (DeducedType.isNull()) { + VDecl->setInvalidDecl(); + return true; + } + + VDecl->setType(DeducedType); + assert(VDecl->isLinkageValid()); + + // In ARC, infer lifetime. + if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(VDecl)) + VDecl->setInvalidDecl(); + + // If this is a redeclaration, check that the type we just deduced matches + // the previously declared type. + if (VarDecl *Old = VDecl->getPreviousDecl()) { + // We never need to merge the type, because we cannot form an incomplete + // array of auto, nor deduce such a type. + MergeVarDeclTypes(VDecl, Old, /*MergeTypeWithPrevious*/ false); + } + + // Check the deduced type is valid for a variable declaration. + CheckVariableDeclarationType(VDecl); + return VDecl->isInvalidDecl(); +} + /// AddInitializerToDecl - Adds the initializer Init to the /// declaration dcl. If DirectInit is true, this is C++ direct /// initialization rather than copy initialization. -void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, - bool DirectInit, bool TypeMayContainAuto) { +void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { // If there is no declaration, there was an error parsing it. Just ignore // the initializer. if (!RealDecl || RealDecl->isInvalidDecl()) { @@ -9806,7 +9986,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, } // C++11 [decl.spec.auto]p6. Deduce the type which 'auto' stands in for. - if (TypeMayContainAuto && VDecl->getType()->isUndeducedType()) { + if (VDecl->getType()->isUndeducedType()) { // Attempt typo correction early so that the type of the init expression can // be deduced based on the chosen correction if the original init contains a // TypoExpr. @@ -9817,32 +9997,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, } Init = Res.get(); - QualType DeducedType = deduceVarTypeFromInitializer( - VDecl, VDecl->getDeclName(), VDecl->getType(), - VDecl->getTypeSourceInfo(), VDecl->getSourceRange(), DirectInit, Init); - if (DeducedType.isNull()) { - RealDecl->setInvalidDecl(); - return; - } - - VDecl->setType(DeducedType); - assert(VDecl->isLinkageValid()); - - // In ARC, infer lifetime. - if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(VDecl)) - VDecl->setInvalidDecl(); - - // If this is a redeclaration, check that the type we just deduced matches - // the previously declared type. - if (VarDecl *Old = VDecl->getPreviousDecl()) { - // We never need to merge the type, because we cannot form an incomplete - // array of auto, nor deduce such a type. - MergeVarDeclTypes(VDecl, Old, /*MergeTypeWithPrevious*/ false); - } - - // Check the deduced type is valid for a variable declaration. - CheckVariableDeclarationType(VDecl); - if (VDecl->isInvalidDecl()) + if (DeduceVariableDeclarationType(VDecl, DirectInit, Init)) return; } @@ -9961,15 +10116,8 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, } InitializedEntity Entity = InitializedEntity::InitializeVariable(VDecl); - InitializationKind Kind = - DirectInit - ? CXXDirectInit - ? InitializationKind::CreateDirect(VDecl->getLocation(), - Init->getLocStart(), - Init->getLocEnd()) - : InitializationKind::CreateDirectList(VDecl->getLocation()) - : InitializationKind::CreateCopy(VDecl->getLocation(), - Init->getLocStart()); + InitializationKind Kind = InitializationKind::CreateForInit( + VDecl->getLocation(), DirectInit, Init); MultiExprArg Args = Init; if (CXXDirectInit) @@ -10278,8 +10426,7 @@ bool Sema::canInitializeWithParenthesizedList(QualType TargetType) { TargetType->getContainedAutoType(); } -void Sema::ActOnUninitializedDecl(Decl *RealDecl, - bool TypeMayContainAuto) { +void Sema::ActOnUninitializedDecl(Decl *RealDecl) { // If there is no declaration, there was an error parsing it. Just ignore it. if (!RealDecl) return; @@ -10294,13 +10441,9 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl, return; } - // C++11 [dcl.spec.auto]p3 - if (TypeMayContainAuto && Type->getContainedAutoType()) { - Diag(Var->getLocation(), diag::err_auto_var_requires_init) - << Var->getDeclName() << Type; - Var->setInvalidDecl(); + if (Type->isUndeducedType() && + DeduceVariableDeclarationType(Var, false, nullptr)) return; - } // C++11 [class.static.data]p3: A static data member can be declared with // the constexpr specifier; if so, its declaration shall specify @@ -10679,7 +10822,7 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { // Apply section attributes and pragmas to global variables. bool GlobalStorage = var->hasGlobalStorage(); if (GlobalStorage && var->isThisDeclarationADefinition() && - ActiveTemplateInstantiations.empty()) { + !inTemplateInstantiation()) { PragmaStack *Stack = nullptr; int SectionFlags = ASTContext::PSF_Implicit | ASTContext::PSF_Read; if (var->getType().isConstQualified()) @@ -10851,7 +10994,8 @@ Sema::FinalizeDeclaration(Decl *ThisDecl) { if (unsigned MaxAlign = Context.getTargetInfo().getMaxTLSAlign()) { // Protect the check so that it's not performed on dependent types and // dependent alignments (we can't determine the alignment in that case). - if (VD->getTLSKind() && !hasDependentAlignment(VD)) { + if (VD->getTLSKind() && !hasDependentAlignment(VD) && + !VD->isInvalidDecl()) { CharUnits MaxAlignChars = Context.toCharUnitsFromBits(MaxAlign); if (Context.getDeclAlign(VD) > MaxAlignChars) { Diag(VD->getLocation(), diag::err_tls_var_aligned_over_maximum) @@ -11031,6 +11175,11 @@ Sema::FinalizeDeclaration(Decl *ThisDecl) { } } +static bool hasDeducedAuto(DeclaratorDecl *DD) { + auto *VD = dyn_cast(DD); + return VD && !VD->getType()->hasAutoForTrailingReturnType(); +} + Sema::DeclGroupPtrTy Sema::FinalizeDeclaratorGroup(Scope *S, const DeclSpec &DS, ArrayRef Group) { SmallVector Decls; @@ -11041,29 +11190,46 @@ Sema::DeclGroupPtrTy Sema::FinalizeDeclaratorGroup(Scope *S, const DeclSpec &DS, DeclaratorDecl *FirstDeclaratorInGroup = nullptr; DecompositionDecl *FirstDecompDeclaratorInGroup = nullptr; bool DiagnosedMultipleDecomps = false; + DeclaratorDecl *FirstNonDeducedAutoInGroup = nullptr; + bool DiagnosedNonDeducedAuto = false; for (unsigned i = 0, e = Group.size(); i != e; ++i) { if (Decl *D = Group[i]) { - auto *DD = dyn_cast(D); - if (DD && !FirstDeclaratorInGroup) - FirstDeclaratorInGroup = DD; - - auto *Decomp = dyn_cast(D); - if (Decomp && !FirstDecompDeclaratorInGroup) - FirstDecompDeclaratorInGroup = Decomp; - - // A decomposition declaration cannot be combined with any other - // declaration in the same group. - auto *OtherDD = FirstDeclaratorInGroup; - if (OtherDD == FirstDecompDeclaratorInGroup) - OtherDD = DD; - if (OtherDD && FirstDecompDeclaratorInGroup && - OtherDD != FirstDecompDeclaratorInGroup && - !DiagnosedMultipleDecomps) { - Diag(FirstDecompDeclaratorInGroup->getLocation(), - diag::err_decomp_decl_not_alone) - << OtherDD->getSourceRange(); - DiagnosedMultipleDecomps = true; + // For declarators, there are some additional syntactic-ish checks we need + // to perform. + if (auto *DD = dyn_cast(D)) { + if (!FirstDeclaratorInGroup) + FirstDeclaratorInGroup = DD; + if (!FirstDecompDeclaratorInGroup) + FirstDecompDeclaratorInGroup = dyn_cast(D); + if (!FirstNonDeducedAutoInGroup && DS.hasAutoTypeSpec() && + !hasDeducedAuto(DD)) + FirstNonDeducedAutoInGroup = DD; + + if (FirstDeclaratorInGroup != DD) { + // A decomposition declaration cannot be combined with any other + // declaration in the same group. + if (FirstDecompDeclaratorInGroup && !DiagnosedMultipleDecomps) { + Diag(FirstDecompDeclaratorInGroup->getLocation(), + diag::err_decomp_decl_not_alone) + << FirstDeclaratorInGroup->getSourceRange() + << DD->getSourceRange(); + DiagnosedMultipleDecomps = true; + } + + // A declarator that uses 'auto' in any way other than to declare a + // variable with a deduced type cannot be combined with any other + // declarator in the same group. + if (FirstNonDeducedAutoInGroup && !DiagnosedNonDeducedAuto) { + Diag(FirstNonDeducedAutoInGroup->getLocation(), + diag::err_auto_non_deduced_not_alone) + << FirstNonDeducedAutoInGroup->getType() + ->hasAutoForTrailingReturnType() + << FirstDeclaratorInGroup->getSourceRange() + << DD->getSourceRange(); + DiagnosedNonDeducedAuto = true; + } + } } Decls.push_back(D); @@ -11079,50 +11245,40 @@ Sema::DeclGroupPtrTy Sema::FinalizeDeclaratorGroup(Scope *S, const DeclSpec &DS, } } - return BuildDeclaratorGroup(Decls, DS.containsPlaceholderType()); + return BuildDeclaratorGroup(Decls); } /// BuildDeclaratorGroup - convert a list of declarations into a declaration /// group, performing any necessary semantic checking. Sema::DeclGroupPtrTy -Sema::BuildDeclaratorGroup(MutableArrayRef Group, - bool TypeMayContainAuto) { - // C++0x [dcl.spec.auto]p7: - // If the type deduced for the template parameter U is not the same in each +Sema::BuildDeclaratorGroup(MutableArrayRef Group) { + // C++14 [dcl.spec.auto]p7: (DR1347) + // If the type that replaces the placeholder type is not the same in each // deduction, the program is ill-formed. - // FIXME: When initializer-list support is added, a distinction is needed - // between the deduced type U and the deduced type which 'auto' stands for. - // auto a = 0, b = { 1, 2, 3 }; - // is legal because the deduced type U is 'int' in both cases. - if (TypeMayContainAuto && Group.size() > 1) { + if (Group.size() > 1) { QualType Deduced; - CanQualType DeducedCanon; VarDecl *DeducedDecl = nullptr; for (unsigned i = 0, e = Group.size(); i != e; ++i) { - if (VarDecl *D = dyn_cast(Group[i])) { - AutoType *AT = D->getType()->getContainedAutoType(); - // Don't reissue diagnostics when instantiating a template. - if (AT && D->isInvalidDecl()) - break; - QualType U = AT ? AT->getDeducedType() : QualType(); - if (!U.isNull()) { - CanQualType UCanon = Context.getCanonicalType(U); - if (Deduced.isNull()) { - Deduced = U; - DeducedCanon = UCanon; - DeducedDecl = D; - } else if (DeducedCanon != UCanon) { - Diag(D->getTypeSourceInfo()->getTypeLoc().getBeginLoc(), - diag::err_auto_different_deductions) - << (unsigned)AT->getKeyword() - << Deduced << DeducedDecl->getDeclName() - << U << D->getDeclName() - << DeducedDecl->getInit()->getSourceRange() - << D->getInit()->getSourceRange(); - D->setInvalidDecl(); - break; - } - } + VarDecl *D = dyn_cast(Group[i]); + if (!D || D->isInvalidDecl()) + break; + DeducedType *DT = D->getType()->getContainedDeducedType(); + if (!DT || DT->getDeducedType().isNull()) + continue; + if (Deduced.isNull()) { + Deduced = DT->getDeducedType(); + DeducedDecl = D; + } else if (!Context.hasSameType(DT->getDeducedType(), Deduced)) { + auto *AT = dyn_cast(DT); + Diag(D->getTypeSourceInfo()->getTypeLoc().getBeginLoc(), + diag::err_auto_different_deductions) + << (AT ? (unsigned)AT->getKeyword() : 3) + << Deduced << DeducedDecl->getDeclName() + << DT->getDeducedType() << D->getDeclName() + << DeducedDecl->getInit()->getSourceRange() + << D->getInit()->getSourceRange(); + D->setInvalidDecl(); + break; } } } @@ -11317,7 +11473,7 @@ ParmVarDecl *Sema::BuildParmVarDeclForTypedef(DeclContext *DC, void Sema::DiagnoseUnusedParameters(ArrayRef Parameters) { // Don't diagnose unused-parameter errors in template instantiations; we // will already have done so in the template itself. - if (!ActiveTemplateInstantiations.empty()) + if (inTemplateInstantiation()) return; for (const ParmVarDecl *Parameter : Parameters) { @@ -11535,8 +11691,6 @@ void Sema::CheckForFunctionRedefinition(FunctionDecl *FD, const FunctionDecl *EffectiveDefinition, SkipBodyInfo *SkipBody) { - // Don't complain if we're in GNU89 mode and the previous definition - // was an extern inline function. const FunctionDecl *Definition = EffectiveDefinition; if (!Definition) if (!FD->isDefined(Definition)) @@ -11621,9 +11775,6 @@ static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator, Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D, SkipBodyInfo *SkipBody) { - // Clear the last template instantiation error context. - LastTemplateInstantiationErrorContext = ActiveTemplateInstantiation(); - if (!D) return D; FunctionDecl *FD = nullptr; @@ -11633,6 +11784,18 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D, else FD = cast(D); + // Check for defining attributes before the check for redefinition. + if (const auto *Attr = FD->getAttr()) { + Diag(Attr->getLocation(), diag::err_alias_is_definition) << FD << 0; + FD->dropAttr(); + FD->setInvalidDecl(); + } + if (const auto *Attr = FD->getAttr()) { + Diag(Attr->getLocation(), diag::err_alias_is_definition) << FD << 1; + FD->dropAttr(); + FD->setInvalidDecl(); + } + // See if this is a redefinition. if (!FD->isLateTemplateParsed()) { CheckForFunctionRedefinition(FD, nullptr, SkipBody); @@ -11657,14 +11820,14 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D, // captures during transformation of nested lambdas, it is necessary to // have the LSI properly restored. if (isGenericLambdaCallOperatorSpecialization(FD)) { - assert(ActiveTemplateInstantiations.size() && - "There should be an active template instantiation on the stack " - "when instantiating a generic lambda!"); + assert(inTemplateInstantiation() && + "There should be an active template instantiation on the stack " + "when instantiating a generic lambda!"); RebuildLambdaScopeInfo(cast(D), *this); - } - else + } else { // Enter a new function scope PushFunctionScope(); + } // Builtin functions cannot be defined. if (unsigned BuiltinID = FD->getBuiltinID()) { @@ -11779,7 +11942,7 @@ bool Sema::canDelayFunctionBody(const Declarator &D) { // We can't delay parsing the body of a function template with a deduced // return type (yet). - if (D.getDeclSpec().containsPlaceholderType()) { + if (D.getDeclSpec().hasAutoTypeSpec()) { // If the placeholder introduces a non-deduced trailing return type, // we can still delay parsing it. if (D.getNumTypeObjects()) { @@ -11827,7 +11990,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, sema::AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy(); sema::AnalysisBasedWarnings::Policy *ActivePolicy = nullptr; - if (getLangOpts().CoroutinesTS && !getCurFunction()->CoroutineStmts.empty()) + if (getLangOpts().CoroutinesTS && getCurFunction()->CoroutinePromise) CheckCompletedCoroutineBody(FD, Body); if (FD) { @@ -11948,7 +12111,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, !LangOpts.CPlusPlus) { TypeSourceInfo *TI = FD->getTypeSourceInfo(); TypeLoc TL = TI->getTypeLoc(); - FunctionTypeLoc FTL = TL.castAs(); + FunctionTypeLoc FTL = TL.getAsAdjusted(); Diag(FTL.getLParenLoc(), diag::warn_strict_prototypes) << 1; } } @@ -12541,7 +12704,7 @@ bool Sema::isAcceptableTagRedeclaration(const TagDecl *Previous, if (const CXXRecordDecl *Record = dyn_cast(Previous)) isTemplate = Record->getDescribedClassTemplate(); - if (!ActiveTemplateInstantiations.empty()) { + if (inTemplateInstantiation()) { // In a template instantiation, do not offer fix-its for tag mismatches // since they usually mess up the template instead of fixing the problem. Diag(NewTagLoc, diag::warn_struct_class_tag_mismatch) @@ -12697,8 +12860,8 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, TagTypeKind Kind = TypeWithKeyword::getTagTypeKindForTypeSpec(TagSpec); bool ScopedEnum = ScopedEnumKWLoc.isValid(); - // FIXME: Check explicit specializations more carefully. - bool isExplicitSpecialization = false; + // FIXME: Check member specializations more carefully. + bool isMemberSpecialization = false; bool Invalid = false; // We only need to do this matching if we have template parameters @@ -12709,7 +12872,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, if (TemplateParameterList *TemplateParams = MatchTemplateParametersToScopeSpecifier( KWLoc, NameLoc, SS, nullptr, TemplateParameterLists, - TUK == TUK_Friend, isExplicitSpecialization, Invalid)) { + TUK == TUK_Friend, isMemberSpecialization, Invalid)) { if (Kind == TTK_Enum) { Diag(KWLoc, diag::err_enum_template); return nullptr; @@ -12736,7 +12899,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, // The "template<>" header is extraneous. Diag(TemplateParams->getTemplateLoc(), diag::err_template_tag_noparams) << TypeWithKeyword::getTagTypeKindName(Kind) << Name; - isExplicitSpecialization = true; + isMemberSpecialization = true; } } } @@ -13056,7 +13219,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, if (auto *Shadow = dyn_cast(DirectPrevDecl)) { auto *OldTag = dyn_cast(PrevDecl); if (SS.isEmpty() && TUK != TUK_Reference && TUK != TUK_Friend && - isDeclInScope(Shadow, SearchDC, S, isExplicitSpecialization) && + isDeclInScope(Shadow, SearchDC, S, isMemberSpecialization) && !(OldTag && isAcceptableTagRedeclContext( *this, OldTag->getDeclContext(), SearchDC))) { Diag(KWLoc, diag::err_using_decl_conflict_reverse); @@ -13076,7 +13239,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, // rementions the tag), reuse the decl. if (TUK == TUK_Reference || TUK == TUK_Friend || isDeclInScope(DirectPrevDecl, SearchDC, S, - SS.isNotEmpty() || isExplicitSpecialization)) { + SS.isNotEmpty() || isMemberSpecialization)) { // Make sure that this wasn't declared as an enum and now used as a // struct or something similar. if (!isAcceptableTagRedeclaration(PrevTagDecl, Kind, @@ -13181,7 +13344,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, // is from an implicit instantiation, don't emit an error // here; we'll catch this in the general case below. bool IsExplicitSpecializationAfterInstantiation = false; - if (isExplicitSpecialization) { + if (isMemberSpecialization) { if (CXXRecordDecl *RD = dyn_cast(Def)) IsExplicitSpecializationAfterInstantiation = RD->getTemplateSpecializationKind() != @@ -13275,7 +13438,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, // Otherwise, only diagnose if the declaration is in scope. } else if (!isDeclInScope(DirectPrevDecl, SearchDC, S, - SS.isNotEmpty() || isExplicitSpecialization)) { + SS.isNotEmpty() || isMemberSpecialization)) { // do nothing // Diagnose implicit declarations introduced by elaborated types. @@ -13407,7 +13570,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, // for explicit specializations, because they have similar checking // (with more specific diagnostics) in the call to // CheckMemberSpecialization, below. - if (!isExplicitSpecialization && + if (!isMemberSpecialization && (TUK == TUK_Definition || TUK == TUK_Declaration) && diagnoseQualifiedDeclaration(SS, DC, OrigName, Loc)) Invalid = true; @@ -13438,7 +13601,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, } if (ModulePrivateLoc.isValid()) { - if (isExplicitSpecialization) + if (isMemberSpecialization) Diag(New->getLocation(), diag::err_module_private_specialization) << 2 << FixItHint::CreateRemoval(ModulePrivateLoc); @@ -13451,7 +13614,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, // If this is a specialization of a member class (of a class template), // check the specialization. - if (isExplicitSpecialization && CheckMemberSpecialization(New, Previous)) + if (isMemberSpecialization && CheckMemberSpecialization(New, Previous)) Invalid = true; // If we're declaring or defining a tag in function prototype scope in C, @@ -13475,9 +13638,6 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, if (Invalid) New->setInvalidDecl(); - if (Attr) - ProcessDeclAttributeList(S, New, Attr); - // Set the lexical context. If the tag has a C++ scope specifier, the // lexical context will be different from the semantic context. New->setLexicalDeclContext(CurContext); @@ -13496,6 +13656,9 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, if (TUK == TUK_Definition) New->startDefinition(); + if (Attr) + ProcessDeclAttributeList(S, New, Attr); + // If this has an identifier, add it to the scope stack. if (TUK == TUK_Friend) { // We might be replacing an existing declaration in the lookup tables; @@ -13618,8 +13781,11 @@ void Sema::ActOnTagFinishDefinition(Scope *S, Decl *TagD, RD->completeDefinition(); } - if (isa(Tag)) + if (auto *RD = dyn_cast(Tag)) { FieldCollector->FinishClass(); + if (Context.getLangOpts().Modules) + RD->computeODRHash(); + } // Exit this scope of this tag's definition. PopDeclContext(); diff --git a/tools/clang/lib/Sema/SemaOverload.cpp b/tools/clang/lib/Sema/SemaOverload.cpp index ed74219..8f5c1b0 100644 --- a/tools/clang/lib/Sema/SemaOverload.cpp +++ b/tools/clang/lib/Sema/SemaOverload.cpp @@ -29,6 +29,7 @@ #include "clang/Sema/Template.h" #include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" @@ -329,6 +330,11 @@ StandardConversionSequence::getNarrowingKind(ASTContext &Ctx, } else if (FromType->isIntegralType(Ctx) && ToType->isRealFloatingType()) { llvm::APSInt IntConstantValue; const Expr *Initializer = IgnoreNarrowingConversion(Converted); + + // If it's value-dependent, we can't tell whether it's narrowing. + if (Initializer->isValueDependent()) + return NK_Dependent_Narrowing; + if (Initializer && Initializer->isIntegerConstantExpr(IntConstantValue, Ctx)) { // Convert the integer to the floating type. @@ -362,6 +368,11 @@ StandardConversionSequence::getNarrowingKind(ASTContext &Ctx, Ctx.getFloatingTypeOrder(FromType, ToType) == 1) { // FromType is larger than ToType. const Expr *Initializer = IgnoreNarrowingConversion(Converted); + + // If it's value-dependent, we can't tell whether it's narrowing. + if (Initializer->isValueDependent()) + return NK_Dependent_Narrowing; + if (Initializer->isCXX11ConstantExpr(Ctx, &ConstantValue)) { // Constant! assert(ConstantValue.isFloat()); @@ -403,6 +414,11 @@ StandardConversionSequence::getNarrowingKind(ASTContext &Ctx, // Not all values of FromType can be represented in ToType. llvm::APSInt InitializerValue; const Expr *Initializer = IgnoreNarrowingConversion(Converted); + + // If it's value-dependent, we can't tell whether it's narrowing. + if (Initializer->isValueDependent()) + return NK_Dependent_Narrowing; + if (!Initializer->isIntegerConstantExpr(InitializerValue, Ctx)) { // Such conversions on variables are always narrowing. return NK_Variable_Narrowing; @@ -574,7 +590,6 @@ clang::MakeDeductionFailureInfo(ASTContext &Context, Result.Result = static_cast(TDK); Result.HasDiagnostic = false; switch (TDK) { - case Sema::TDK_Success: case Sema::TDK_Invalid: case Sema::TDK_InstantiationDepth: case Sema::TDK_TooManyArguments: @@ -589,7 +604,8 @@ clang::MakeDeductionFailureInfo(ASTContext &Context, Result.Data = Info.Param.getOpaqueValue(); break; - case Sema::TDK_DeducedMismatch: { + case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: { // FIXME: Should allocate from normal heap so that we can free this later. auto *Saved = new (Context) DFIDeducedMismatchArgs; Saved->FirstArg = Info.FirstArg; @@ -630,9 +646,9 @@ clang::MakeDeductionFailureInfo(ASTContext &Context, } break; - case Sema::TDK_FailedOverloadResolution: - Result.Data = Info.Expression; - break; + case Sema::TDK_Success: + case Sema::TDK_NonDependentConversionFailure: + llvm_unreachable("not a deduction failure"); } return Result; @@ -647,13 +663,14 @@ void DeductionFailureInfo::Destroy() { case Sema::TDK_TooManyArguments: case Sema::TDK_TooFewArguments: case Sema::TDK_InvalidExplicitArguments: - case Sema::TDK_FailedOverloadResolution: case Sema::TDK_CUDATargetMismatch: + case Sema::TDK_NonDependentConversionFailure: break; case Sema::TDK_Inconsistent: case Sema::TDK_Underqualified: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: // FIXME: Destroy the data? Data = nullptr; @@ -689,9 +706,10 @@ TemplateParameter DeductionFailureInfo::getTemplateParameter() { case Sema::TDK_TooFewArguments: case Sema::TDK_SubstitutionFailure: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: - case Sema::TDK_FailedOverloadResolution: case Sema::TDK_CUDATargetMismatch: + case Sema::TDK_NonDependentConversionFailure: return TemplateParameter(); case Sema::TDK_Incomplete: @@ -722,11 +740,12 @@ TemplateArgumentList *DeductionFailureInfo::getTemplateArgumentList() { case Sema::TDK_Inconsistent: case Sema::TDK_Underqualified: case Sema::TDK_NonDeducedMismatch: - case Sema::TDK_FailedOverloadResolution: case Sema::TDK_CUDATargetMismatch: + case Sema::TDK_NonDependentConversionFailure: return nullptr; case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: return static_cast(Data)->TemplateArgs; case Sema::TDK_SubstitutionFailure: @@ -750,13 +769,14 @@ const TemplateArgument *DeductionFailureInfo::getFirstArg() { case Sema::TDK_TooFewArguments: case Sema::TDK_InvalidExplicitArguments: case Sema::TDK_SubstitutionFailure: - case Sema::TDK_FailedOverloadResolution: case Sema::TDK_CUDATargetMismatch: + case Sema::TDK_NonDependentConversionFailure: return nullptr; case Sema::TDK_Inconsistent: case Sema::TDK_Underqualified: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: return &static_cast(Data)->FirstArg; @@ -778,13 +798,14 @@ const TemplateArgument *DeductionFailureInfo::getSecondArg() { case Sema::TDK_TooFewArguments: case Sema::TDK_InvalidExplicitArguments: case Sema::TDK_SubstitutionFailure: - case Sema::TDK_FailedOverloadResolution: case Sema::TDK_CUDATargetMismatch: + case Sema::TDK_NonDependentConversionFailure: return nullptr; case Sema::TDK_Inconsistent: case Sema::TDK_Underqualified: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: return &static_cast(Data)->SecondArg; @@ -796,26 +817,21 @@ const TemplateArgument *DeductionFailureInfo::getSecondArg() { return nullptr; } -Expr *DeductionFailureInfo::getExpr() { - if (static_cast(Result) == - Sema::TDK_FailedOverloadResolution) - return static_cast(Data); - - return nullptr; -} - llvm::Optional DeductionFailureInfo::getCallArgIndex() { - if (static_cast(Result) == - Sema::TDK_DeducedMismatch) + switch (static_cast(Result)) { + case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: return static_cast(Data)->CallArgIndex; - return llvm::None; + default: + return llvm::None; + } } void OverloadCandidateSet::destroyCandidates() { for (iterator i = begin(), e = end(); i != e; ++i) { - for (unsigned ii = 0, ie = i->NumConversions; ii != ie; ++ii) - i->Conversions[ii].~ImplicitConversionSequence(); + for (auto &C : i->Conversions) + C.~ImplicitConversionSequence(); if (!i->Viable && i->FailureKind == ovl_fail_bad_deduction) i->DeductionFailure.Destroy(); } @@ -823,8 +839,8 @@ void OverloadCandidateSet::destroyCandidates() { void OverloadCandidateSet::clear() { destroyCandidates(); - ConversionSequenceAllocator.Reset(); - NumInlineSequences = 0; + SlabAllocator.Reset(); + NumInlineBytesUsed = 0; Candidates.clear(); Functions.clear(); } @@ -981,16 +997,23 @@ Sema::CheckOverload(Scope *S, FunctionDecl *New, const LookupResult &Old, Match = *I; return Ovl_Match; } - } else if (isa(OldD)) { + } else if (isa(OldD) || isa(OldD)) { // We can overload with these, which can show up when doing // redeclaration checks for UsingDecls. assert(Old.getLookupKind() == LookupUsingDeclName); } else if (isa(OldD)) { // We can always overload with tags by hiding them. - } else if (isa(OldD)) { + } else if (auto *UUD = dyn_cast(OldD)) { // Optimistically assume that an unresolved using decl will // overload; if it doesn't, we'll have to diagnose during // template instantiation. + // + // Exception: if the scope is dependent and this is not a class + // member, the using declaration can only introduce an enumerator. + if (UUD->getQualifier()->isDependent() && !UUD->isCXXClassMember()) { + Match = *I; + return Ovl_NonFunction; + } } else { // (C++ 13p1): // Only function declarations can be overloaded; object and type @@ -1778,6 +1801,11 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType, From->EvaluateKnownConstInt(S.getASTContext()) == 0) { SCS.Second = ICK_Zero_Event_Conversion; FromType = ToType; + } else if (ToType->isQueueT() && + From->isIntegerConstantExpr(S.getASTContext()) && + (From->EvaluateKnownConstInt(S.getASTContext()) == 0)) { + SCS.Second = ICK_Zero_Queue_Conversion; + FromType = ToType; } else { // No second conversion required. SCS.Second = ICK_Identity; @@ -4236,7 +4264,7 @@ Sema::CompareReferenceRelationship(SourceLocation Loc, return Ref_Related; } -/// \brief Look for a user-defined conversion to an value reference-compatible +/// \brief Look for a user-defined conversion to a value reference-compatible /// with DeclType. Return true if something definite is found. static bool FindConversionForRefInit(Sema &S, ImplicitConversionSequence &ICS, @@ -5155,6 +5183,7 @@ static bool CheckConvertedConstantConversions(Sema &S, case ICK_Function_Conversion: case ICK_Integral_Promotion: case ICK_Integral_Conversion: // Narrowing conversions are checked elsewhere. + case ICK_Zero_Queue_Conversion: return true; case ICK_Boolean_Conversion: @@ -5282,6 +5311,9 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From, QualType PreNarrowingType; switch (SCS->getNarrowingKind(S.Context, Result.get(), PreNarrowingValue, PreNarrowingType)) { + case NK_Dependent_Narrowing: + // Implicit conversion to a narrower type, but the expression is + // value-dependent so we can't tell whether it's actually narrowing. case NK_Variable_Narrowing: // Implicit conversion to a narrower type, and the value is not a constant // expression. We'll diagnose this in a moment. @@ -5300,6 +5332,11 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From, break; } + if (Result.get()->isValueDependent()) { + Value = APValue(); + return Result; + } + // Check the expression is a constant expression. SmallVector Notes; Expr::EvalResult Eval; @@ -5346,7 +5383,7 @@ ExprResult Sema::CheckConvertedConstantExpression(Expr *From, QualType T, APValue V; auto R = ::CheckConvertedConstantExpression(*this, From, T, V, CCE, true); - if (!R.isInvalid()) + if (!R.isInvalid() && !R.get()->isValueDependent()) Value = V.getInt(); return R; } @@ -5801,7 +5838,8 @@ Sema::AddOverloadCandidate(FunctionDecl *Function, OverloadCandidateSet &CandidateSet, bool SuppressUserConversions, bool PartialOverloading, - bool AllowExplicit) { + bool AllowExplicit, + ConversionSequenceList EarlyConversions) { const FunctionProtoType *Proto = dyn_cast(Function->getType()->getAs()); assert(Proto && "Functions without a prototype cannot be overloaded"); @@ -5817,10 +5855,10 @@ Sema::AddOverloadCandidate(FunctionDecl *Function, // function, e.g., X::f(). We use an empty type for the implied // object argument (C++ [over.call.func]p3), and the acting context // is irrelevant. - AddMethodCandidate(Method, FoundDecl, Method->getParent(), - QualType(), Expr::Classification::makeSimpleLValue(), - Args, CandidateSet, SuppressUserConversions, - PartialOverloading); + AddMethodCandidate(Method, FoundDecl, Method->getParent(), QualType(), + Expr::Classification::makeSimpleLValue(), Args, + CandidateSet, SuppressUserConversions, + PartialOverloading, EarlyConversions); return; } // We treat a constructor like a non-member function, since its object @@ -5853,7 +5891,8 @@ Sema::AddOverloadCandidate(FunctionDecl *Function, EnterExpressionEvaluationContext Unevaluated(*this, Sema::Unevaluated); // Add this candidate - OverloadCandidate &Candidate = CandidateSet.addCandidate(Args.size()); + OverloadCandidate &Candidate = + CandidateSet.addCandidate(Args.size(), EarlyConversions); Candidate.FoundDecl = FoundDecl; Candidate.Function = Function; Candidate.Viable = true; @@ -5874,6 +5913,28 @@ Sema::AddOverloadCandidate(FunctionDecl *Function, Candidate.FailureKind = ovl_fail_illegal_constructor; return; } + + // C++ [over.match.funcs]p8: (proposed DR resolution) + // A constructor inherited from class type C that has a first parameter + // of type "reference to P" (including such a constructor instantiated + // from a template) is excluded from the set of candidate functions when + // constructing an object of type cv D if the argument list has exactly + // one argument and D is reference-related to P and P is reference-related + // to C. + auto *Shadow = dyn_cast(FoundDecl.getDecl()); + if (Shadow && Args.size() == 1 && Constructor->getNumParams() >= 1 && + Constructor->getParamDecl(0)->getType()->isReferenceType()) { + QualType P = Constructor->getParamDecl(0)->getType()->getPointeeType(); + QualType C = Context.getRecordType(Constructor->getParent()); + QualType D = Context.getRecordType(Shadow->getParent()); + SourceLocation Loc = Args.front()->getExprLoc(); + if ((Context.hasSameUnqualifiedType(P, C) || IsDerivedFrom(Loc, P, C)) && + (Context.hasSameUnqualifiedType(D, P) || IsDerivedFrom(Loc, D, P))) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_inhctor_slice; + return; + } + } } unsigned NumParams = Proto->getNumParams(); @@ -5917,7 +5978,10 @@ Sema::AddOverloadCandidate(FunctionDecl *Function, // Determine the implicit conversion sequences for each of the // arguments. for (unsigned ArgIdx = 0; ArgIdx < Args.size(); ++ArgIdx) { - if (ArgIdx < NumParams) { + if (Candidate.Conversions[ArgIdx].isInitialized()) { + // We already formed a conversion sequence for this parameter during + // template argument deduction. + } else if (ArgIdx < NumParams) { // (C++ 13.3.2p3): for F to be a viable function, there shall // exist for each argument an implicit conversion sequence // (13.3.3.1) that converts that argument to the corresponding @@ -5949,6 +6013,12 @@ Sema::AddOverloadCandidate(FunctionDecl *Function, Candidate.DeductionFailure.Data = FailedAttr; return; } + + if (LangOpts.OpenCL && isOpenCLDisabledDecl(Function)) { + Candidate.Viable = false; + Candidate.FailureKind = ovl_fail_ext_disabled; + return; + } } ObjCMethodDecl * @@ -6061,66 +6131,87 @@ getOrderedEnableIfAttrs(const FunctionDecl *Function) { return Result; } -EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef Args, - bool MissingImplicitThis) { - auto EnableIfAttrs = getOrderedEnableIfAttrs(Function); - if (EnableIfAttrs.empty()) - return nullptr; - - SFINAETrap Trap(*this); - SmallVector ConvertedArgs; - bool InitializationFailed = false; +static bool +convertArgsForAvailabilityChecks(Sema &S, FunctionDecl *Function, Expr *ThisArg, + ArrayRef Args, Sema::SFINAETrap &Trap, + bool MissingImplicitThis, Expr *&ConvertedThis, + SmallVectorImpl &ConvertedArgs) { + if (ThisArg) { + CXXMethodDecl *Method = cast(Function); + assert(!isa(Method) && + "Shouldn't have `this` for ctors!"); + assert(!Method->isStatic() && "Shouldn't have `this` for static methods!"); + ExprResult R = S.PerformObjectArgumentInitialization( + ThisArg, /*Qualifier=*/nullptr, Method, Method); + if (R.isInvalid()) + return false; + ConvertedThis = R.get(); + } else { + if (auto *MD = dyn_cast(Function)) { + (void)MD; + assert((MissingImplicitThis || MD->isStatic() || + isa(MD)) && + "Expected `this` for non-ctor instance methods"); + } + ConvertedThis = nullptr; + } // Ignore any variadic arguments. Converting them is pointless, since the - // user can't refer to them in the enable_if condition. + // user can't refer to them in the function condition. unsigned ArgSizeNoVarargs = std::min(Function->param_size(), Args.size()); // Convert the arguments. for (unsigned I = 0; I != ArgSizeNoVarargs; ++I) { ExprResult R; - if (I == 0 && !MissingImplicitThis && isa(Function) && - !cast(Function)->isStatic() && - !isa(Function)) { - CXXMethodDecl *Method = cast(Function); - R = PerformObjectArgumentInitialization(Args[0], /*Qualifier=*/nullptr, - Method, Method); - } else { - R = PerformCopyInitialization(InitializedEntity::InitializeParameter( - Context, Function->getParamDecl(I)), + R = S.PerformCopyInitialization(InitializedEntity::InitializeParameter( + S.Context, Function->getParamDecl(I)), SourceLocation(), Args[I]); - } - if (R.isInvalid()) { - InitializationFailed = true; - break; - } + if (R.isInvalid()) + return false; ConvertedArgs.push_back(R.get()); } - if (InitializationFailed || Trap.hasErrorOccurred()) - return EnableIfAttrs[0]; + if (Trap.hasErrorOccurred()) + return false; // Push default arguments if needed. if (!Function->isVariadic() && Args.size() < Function->getNumParams()) { for (unsigned i = Args.size(), e = Function->getNumParams(); i != e; ++i) { ParmVarDecl *P = Function->getParamDecl(i); - ExprResult R = PerformCopyInitialization( - InitializedEntity::InitializeParameter(Context, + ExprResult R = S.PerformCopyInitialization( + InitializedEntity::InitializeParameter(S.Context, Function->getParamDecl(i)), SourceLocation(), P->hasUninstantiatedDefaultArg() ? P->getUninstantiatedDefaultArg() : P->getDefaultArg()); - if (R.isInvalid()) { - InitializationFailed = true; - break; - } + if (R.isInvalid()) + return false; ConvertedArgs.push_back(R.get()); } - if (InitializationFailed || Trap.hasErrorOccurred()) - return EnableIfAttrs[0]; + if (Trap.hasErrorOccurred()) + return false; } + return true; +} + +EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef Args, + bool MissingImplicitThis) { + SmallVector EnableIfAttrs = + getOrderedEnableIfAttrs(Function); + if (EnableIfAttrs.empty()) + return nullptr; + + SFINAETrap Trap(*this); + SmallVector ConvertedArgs; + // FIXME: We should look into making enable_if late-parsed. + Expr *DiscardedThis; + if (!convertArgsForAvailabilityChecks( + *this, Function, /*ThisArg=*/nullptr, Args, Trap, + /*MissingImplicitThis=*/true, DiscardedThis, ConvertedArgs)) + return EnableIfAttrs[0]; for (auto *EIA : EnableIfAttrs) { APValue Result; @@ -6136,6 +6227,75 @@ EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef Args, return nullptr; } +template +static bool diagnoseDiagnoseIfAttrsWith(Sema &S, const FunctionDecl *FD, + bool ArgDependent, SourceLocation Loc, + CheckFn &&IsSuccessful) { + SmallVector Attrs; + for (const auto *DIA : FD->specific_attrs()) { + if (ArgDependent == DIA->getArgDependent()) + Attrs.push_back(DIA); + } + + // Common case: No diagnose_if attributes, so we can quit early. + if (Attrs.empty()) + return false; + + auto WarningBegin = std::stable_partition( + Attrs.begin(), Attrs.end(), + [](const DiagnoseIfAttr *DIA) { return DIA->isError(); }); + + // Note that diagnose_if attributes are late-parsed, so they appear in the + // correct order (unlike enable_if attributes). + auto ErrAttr = llvm::find_if(llvm::make_range(Attrs.begin(), WarningBegin), + IsSuccessful); + if (ErrAttr != WarningBegin) { + const DiagnoseIfAttr *DIA = *ErrAttr; + S.Diag(Loc, diag::err_diagnose_if_succeeded) << DIA->getMessage(); + S.Diag(DIA->getLocation(), diag::note_from_diagnose_if) + << DIA->getParent() << DIA->getCond()->getSourceRange(); + return true; + } + + for (const auto *DIA : llvm::make_range(WarningBegin, Attrs.end())) + if (IsSuccessful(DIA)) { + S.Diag(Loc, diag::warn_diagnose_if_succeeded) << DIA->getMessage(); + S.Diag(DIA->getLocation(), diag::note_from_diagnose_if) + << DIA->getParent() << DIA->getCond()->getSourceRange(); + } + + return false; +} + +bool Sema::diagnoseArgDependentDiagnoseIfAttrs(const FunctionDecl *Function, + const Expr *ThisArg, + ArrayRef Args, + SourceLocation Loc) { + return diagnoseDiagnoseIfAttrsWith( + *this, Function, /*ArgDependent=*/true, Loc, + [&](const DiagnoseIfAttr *DIA) { + APValue Result; + // It's sane to use the same Args for any redecl of this function, since + // EvaluateWithSubstitution only cares about the position of each + // argument in the arg list, not the ParmVarDecl* it maps to. + if (!DIA->getCond()->EvaluateWithSubstitution( + Result, Context, DIA->getParent(), Args, ThisArg)) + return false; + return Result.isInt() && Result.getInt().getBoolValue(); + }); +} + +bool Sema::diagnoseArgIndependentDiagnoseIfAttrs(const FunctionDecl *Function, + SourceLocation Loc) { + return diagnoseDiagnoseIfAttrsWith( + *this, Function, /*ArgDependent=*/false, Loc, + [&](const DiagnoseIfAttr *DIA) { + bool Result; + return DIA->getCond()->EvaluateAsBooleanCondition(Result, Context) && + Result; + }); +} + /// \brief Add all of the function declarations in the given function set to /// the overload candidate set. void Sema::AddFunctionCandidates(const UnresolvedSetImpl &Fns, @@ -6151,8 +6311,8 @@ void Sema::AddFunctionCandidates(const UnresolvedSetImpl &Fns, AddMethodCandidate(cast(FD), F.getPair(), cast(FD)->getParent(), Args[0]->getType(), Args[0]->Classify(Context), - Args.slice(1), CandidateSet, - SuppressUserConversions, PartialOverloading); + Args.slice(1), CandidateSet, SuppressUserConversions, + PartialOverloading); else AddOverloadCandidate(FD, F.getPair(), Args, CandidateSet, SuppressUserConversions, PartialOverloading); @@ -6160,13 +6320,12 @@ void Sema::AddFunctionCandidates(const UnresolvedSetImpl &Fns, FunctionTemplateDecl *FunTmpl = cast(D); if (isa(FunTmpl->getTemplatedDecl()) && !cast(FunTmpl->getTemplatedDecl())->isStatic()) - AddMethodTemplateCandidate(FunTmpl, F.getPair(), - cast(FunTmpl->getDeclContext()), - ExplicitTemplateArgs, - Args[0]->getType(), - Args[0]->Classify(Context), Args.slice(1), - CandidateSet, SuppressUserConversions, - PartialOverloading); + AddMethodTemplateCandidate( + FunTmpl, F.getPair(), + cast(FunTmpl->getDeclContext()), + ExplicitTemplateArgs, Args[0]->getType(), + Args[0]->Classify(Context), Args.slice(1), CandidateSet, + SuppressUserConversions, PartialOverloading); else AddTemplateOverloadCandidate(FunTmpl, F.getPair(), ExplicitTemplateArgs, Args, @@ -6194,15 +6353,13 @@ void Sema::AddMethodCandidate(DeclAccessPair FoundDecl, assert(isa(TD->getTemplatedDecl()) && "Expected a member function template"); AddMethodTemplateCandidate(TD, FoundDecl, ActingContext, - /*ExplicitArgs*/ nullptr, - ObjectType, ObjectClassification, - Args, CandidateSet, + /*ExplicitArgs*/ nullptr, ObjectType, + ObjectClassification, Args, CandidateSet, SuppressUserConversions); } else { AddMethodCandidate(cast(Decl), FoundDecl, ActingContext, - ObjectType, ObjectClassification, - Args, - CandidateSet, SuppressUserConversions); + ObjectType, ObjectClassification, Args, CandidateSet, + SuppressUserConversions); } } @@ -6220,7 +6377,8 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, ArrayRef Args, OverloadCandidateSet &CandidateSet, bool SuppressUserConversions, - bool PartialOverloading) { + bool PartialOverloading, + ConversionSequenceList EarlyConversions) { const FunctionProtoType *Proto = dyn_cast(Method->getType()->getAs()); assert(Proto && "Methods without a prototype cannot be overloaded"); @@ -6241,7 +6399,8 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, EnterExpressionEvaluationContext Unevaluated(*this, Sema::Unevaluated); // Add this candidate - OverloadCandidate &Candidate = CandidateSet.addCandidate(Args.size() + 1); + OverloadCandidate &Candidate = + CandidateSet.addCandidate(Args.size() + 1, EarlyConversions); Candidate.FoundDecl = FoundDecl; Candidate.Function = Method; Candidate.IsSurrogate = false; @@ -6303,7 +6462,10 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, // Determine the implicit conversion sequences for each of the // arguments. for (unsigned ArgIdx = 0; ArgIdx < Args.size(); ++ArgIdx) { - if (ArgIdx < NumParams) { + if (Candidate.Conversions[ArgIdx + 1].isInitialized()) { + // We already formed a conversion sequence for this parameter during + // template argument deduction. + } else if (ArgIdx < NumParams) { // (C++ 13.3.2p3): for F to be a viable function, there shall // exist for each argument an implicit conversion sequence // (13.3.3.1) that converts that argument to the corresponding @@ -6364,19 +6526,32 @@ Sema::AddMethodTemplateCandidate(FunctionTemplateDecl *MethodTmpl, // functions. TemplateDeductionInfo Info(CandidateSet.getLocation()); FunctionDecl *Specialization = nullptr; - if (TemplateDeductionResult Result - = DeduceTemplateArguments(MethodTmpl, ExplicitTemplateArgs, Args, - Specialization, Info, PartialOverloading)) { - OverloadCandidate &Candidate = CandidateSet.addCandidate(); + ConversionSequenceList Conversions; + if (TemplateDeductionResult Result = DeduceTemplateArguments( + MethodTmpl, ExplicitTemplateArgs, Args, Specialization, Info, + PartialOverloading, [&](ArrayRef ParamTypes) { + return CheckNonDependentConversions( + MethodTmpl, ParamTypes, Args, CandidateSet, Conversions, + SuppressUserConversions, ActingContext, ObjectType, + ObjectClassification); + })) { + OverloadCandidate &Candidate = + CandidateSet.addCandidate(Conversions.size(), Conversions); Candidate.FoundDecl = FoundDecl; Candidate.Function = MethodTmpl->getTemplatedDecl(); Candidate.Viable = false; - Candidate.FailureKind = ovl_fail_bad_deduction; Candidate.IsSurrogate = false; - Candidate.IgnoreObjectArgument = false; + Candidate.IgnoreObjectArgument = + cast(Candidate.Function)->isStatic() || + ObjectType.isNull(); Candidate.ExplicitCallArguments = Args.size(); - Candidate.DeductionFailure = MakeDeductionFailureInfo(Context, Result, - Info); + if (Result == TDK_NonDependentConversionFailure) + Candidate.FailureKind = ovl_fail_bad_conversion; + else { + Candidate.FailureKind = ovl_fail_bad_deduction; + Candidate.DeductionFailure = MakeDeductionFailureInfo(Context, Result, + Info); + } return; } @@ -6387,7 +6562,8 @@ Sema::AddMethodTemplateCandidate(FunctionTemplateDecl *MethodTmpl, "Specialization is not a member function?"); AddMethodCandidate(cast(Specialization), FoundDecl, ActingContext, ObjectType, ObjectClassification, Args, - CandidateSet, SuppressUserConversions, PartialOverloading); + CandidateSet, SuppressUserConversions, PartialOverloading, + Conversions); } /// \brief Add a C++ function template specialization as a candidate @@ -6415,19 +6591,33 @@ Sema::AddTemplateOverloadCandidate(FunctionTemplateDecl *FunctionTemplate, // functions. TemplateDeductionInfo Info(CandidateSet.getLocation()); FunctionDecl *Specialization = nullptr; - if (TemplateDeductionResult Result - = DeduceTemplateArguments(FunctionTemplate, ExplicitTemplateArgs, Args, - Specialization, Info, PartialOverloading)) { - OverloadCandidate &Candidate = CandidateSet.addCandidate(); + ConversionSequenceList Conversions; + if (TemplateDeductionResult Result = DeduceTemplateArguments( + FunctionTemplate, ExplicitTemplateArgs, Args, Specialization, Info, + PartialOverloading, [&](ArrayRef ParamTypes) { + return CheckNonDependentConversions(FunctionTemplate, ParamTypes, + Args, CandidateSet, Conversions, + SuppressUserConversions); + })) { + OverloadCandidate &Candidate = + CandidateSet.addCandidate(Conversions.size(), Conversions); Candidate.FoundDecl = FoundDecl; Candidate.Function = FunctionTemplate->getTemplatedDecl(); Candidate.Viable = false; - Candidate.FailureKind = ovl_fail_bad_deduction; Candidate.IsSurrogate = false; - Candidate.IgnoreObjectArgument = false; + // Ignore the object argument if there is one, since we don't have an object + // type. + Candidate.IgnoreObjectArgument = + isa(Candidate.Function) && + !isa(Candidate.Function); Candidate.ExplicitCallArguments = Args.size(); - Candidate.DeductionFailure = MakeDeductionFailureInfo(Context, Result, - Info); + if (Result == TDK_NonDependentConversionFailure) + Candidate.FailureKind = ovl_fail_bad_conversion; + else { + Candidate.FailureKind = ovl_fail_bad_deduction; + Candidate.DeductionFailure = MakeDeductionFailureInfo(Context, Result, + Info); + } return; } @@ -6435,7 +6625,64 @@ Sema::AddTemplateOverloadCandidate(FunctionTemplateDecl *FunctionTemplate, // deduction as a candidate. assert(Specialization && "Missing function template specialization?"); AddOverloadCandidate(Specialization, FoundDecl, Args, CandidateSet, - SuppressUserConversions, PartialOverloading); + SuppressUserConversions, PartialOverloading, + /*AllowExplicit*/false, Conversions); +} + +/// Check that implicit conversion sequences can be formed for each argument +/// whose corresponding parameter has a non-dependent type, per DR1391's +/// [temp.deduct.call]p10. +bool Sema::CheckNonDependentConversions( + FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes, + ArrayRef Args, OverloadCandidateSet &CandidateSet, + ConversionSequenceList &Conversions, bool SuppressUserConversions, + CXXRecordDecl *ActingContext, QualType ObjectType, + Expr::Classification ObjectClassification) { + // FIXME: The cases in which we allow explicit conversions for constructor + // arguments never consider calling a constructor template. It's not clear + // that is correct. + const bool AllowExplicit = false; + + auto *FD = FunctionTemplate->getTemplatedDecl(); + auto *Method = dyn_cast(FD); + bool HasThisConversion = Method && !isa(Method); + unsigned ThisConversions = HasThisConversion ? 1 : 0; + + Conversions = + CandidateSet.allocateConversionSequences(ThisConversions + Args.size()); + + // Overload resolution is always an unevaluated context. + EnterExpressionEvaluationContext Unevaluated(*this, Sema::Unevaluated); + + // For a method call, check the 'this' conversion here too. DR1391 doesn't + // require that, but this check should never result in a hard error, and + // overload resolution is permitted to sidestep instantiations. + if (HasThisConversion && !cast(FD)->isStatic() && + !ObjectType.isNull()) { + Conversions[0] = TryObjectArgumentInitialization( + *this, CandidateSet.getLocation(), ObjectType, ObjectClassification, + Method, ActingContext); + if (Conversions[0].isBad()) + return true; + } + + for (unsigned I = 0, N = std::min(ParamTypes.size(), Args.size()); I != N; + ++I) { + QualType ParamType = ParamTypes[I]; + if (!ParamType->isDependentType()) { + Conversions[ThisConversions + I] + = TryCopyInitialization(*this, Args[I], ParamType, + SuppressUserConversions, + /*InOverloadResolution=*/true, + /*AllowObjCWritebackConversion=*/ + getLangOpts().ObjCAutoRefCount, + AllowExplicit); + if (Conversions[ThisConversions + I].isBad()) + return true; + } + } + + return false; } /// Determine whether this is an allowable conversion from the result @@ -6843,10 +7090,8 @@ void Sema::AddMemberOperatorCandidates(OverloadedOperatorKind Op, Oper != OperEnd; ++Oper) AddMethodCandidate(Oper.getPair(), Args[0]->getType(), - Args[0]->Classify(Context), - Args.slice(1), - CandidateSet, - /* SuppressUserConversions = */ false); + Args[0]->Classify(Context), Args.slice(1), + CandidateSet, /*SuppressUserConversions=*/false); } } @@ -8679,8 +8924,8 @@ bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1, // Define functions that don't require ill-formed conversions for a given // argument to be better candidates than functions that do. - unsigned NumArgs = Cand1.NumConversions; - assert(Cand2.NumConversions == NumArgs && "Overload candidate mismatch"); + unsigned NumArgs = Cand1.Conversions.size(); + assert(Cand2.Conversions.size() == NumArgs && "Overload candidate mismatch"); bool HasBetterConversion = false; for (unsigned ArgIdx = StartArg; ArgIdx < NumArgs; ++ArgIdx) { bool Cand1Bad = IsIllFormedConversion(Cand1.Conversions[ArgIdx]); @@ -8751,6 +8996,12 @@ bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1, // C++14 [over.match.best]p1 section 2 bullet 3. } + // -- F1 is generated from a deduction-guide and F2 is not + auto *Guide1 = dyn_cast_or_null(Cand1.Function); + auto *Guide2 = dyn_cast_or_null(Cand2.Function); + if (Guide1 && Guide2 && Guide1->isImplicit() != Guide2->isImplicit()) + return Guide2->isImplicit(); + // -- F1 is a non-template function and F2 is a function template // specialization, or, if not that, bool Cand1IsSpecialization = Cand1.Function && @@ -8921,9 +9172,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc, S.IdentifyCUDAPreference(Caller, Cand->Function) == Sema::CFP_WrongSide; }; - Candidates.erase(std::remove_if(Candidates.begin(), Candidates.end(), - IsWrongSideCandidate), - Candidates.end()); + llvm::erase_if(Candidates, IsWrongSideCandidate); } } @@ -9129,7 +9378,7 @@ void Sema::NoteOverloadCandidate(NamedDecl *Found, FunctionDecl *Fn, std::string FnDesc; OverloadCandidateKind K = ClassifyOverloadCandidate(*this, Found, Fn, FnDesc); PartialDiagnostic PD = PDiag(diag::note_ovl_candidate) - << (unsigned) K << FnDesc; + << (unsigned) K << Fn << FnDesc; HandleFunctionTypeMismatch(PD, Fn->getType(), DestType); Diag(Fn->getLocation(), PD); @@ -9562,9 +9811,25 @@ static void DiagnoseBadDeduction(Sema &S, NamedDecl *Found, Decl *Templated, int which = 0; if (isa(ParamD)) which = 0; - else if (isa(ParamD)) + else if (isa(ParamD)) { + // Deduction might have failed because we deduced arguments of two + // different types for a non-type template parameter. + // FIXME: Use a different TDK value for this. + QualType T1 = + DeductionFailure.getFirstArg()->getNonTypeTemplateArgumentType(); + QualType T2 = + DeductionFailure.getSecondArg()->getNonTypeTemplateArgumentType(); + if (!S.Context.hasSameType(T1, T2)) { + S.Diag(Templated->getLocation(), + diag::note_ovl_candidate_inconsistent_deduction_types) + << ParamD->getDeclName() << *DeductionFailure.getFirstArg() << T1 + << *DeductionFailure.getSecondArg() << T2; + MaybeEmitInheritedConstructorNote(S, Found); + return; + } + which = 1; - else { + } else { which = 2; } @@ -9648,15 +9913,8 @@ static void DiagnoseBadDeduction(Sema &S, NamedDecl *Found, Decl *Templated, return; } - case Sema::TDK_FailedOverloadResolution: { - OverloadExpr::FindResult R = OverloadExpr::find(DeductionFailure.getExpr()); - S.Diag(Templated->getLocation(), - diag::note_ovl_candidate_failed_overload_resolution) - << R.Expression->getName(); - return; - } - - case Sema::TDK_DeducedMismatch: { + case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: { // Format the template argument list into the argument string. SmallString<128> TemplateArgString; if (TemplateArgumentList *Args = @@ -9669,7 +9927,8 @@ static void DiagnoseBadDeduction(Sema &S, NamedDecl *Found, Decl *Templated, S.Diag(Templated->getLocation(), diag::note_ovl_candidate_deduced_mismatch) << (*DeductionFailure.getCallArgIndex() + 1) << *DeductionFailure.getFirstArg() << *DeductionFailure.getSecondArg() - << TemplateArgString; + << TemplateArgString + << (DeductionFailure.Result == Sema::TDK_DeducedMismatchNested); break; } @@ -9799,10 +10058,17 @@ static void DiagnoseFailedEnableIfAttr(Sema &S, OverloadCandidate *Cand) { EnableIfAttr *Attr = static_cast(Cand->DeductionFailure.Data); S.Diag(Callee->getLocation(), - diag::note_ovl_candidate_disabled_by_enable_if_attr) + diag::note_ovl_candidate_disabled_by_function_cond_attr) << Attr->getCond()->getSourceRange() << Attr->getMessage(); } +static void DiagnoseOpenCLExtensionDisabled(Sema &S, OverloadCandidate *Cand) { + FunctionDecl *Callee = Cand->Function; + + S.Diag(Callee->getLocation(), + diag::note_ovl_candidate_disabled_by_extension); +} + /// Generates a 'note' diagnostic for an overload candidate. We've /// already generated a primary error at the call site. /// @@ -9822,21 +10088,20 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand, FunctionDecl *Fn = Cand->Function; // Note deleted candidates, but only if they're viable. - if (Cand->Viable && (Fn->isDeleted() || - S.isFunctionConsideredUnavailable(Fn))) { - std::string FnDesc; - OverloadCandidateKind FnKind = + if (Cand->Viable) { + if (Fn->isDeleted() || S.isFunctionConsideredUnavailable(Fn)) { + std::string FnDesc; + OverloadCandidateKind FnKind = ClassifyOverloadCandidate(S, Cand->FoundDecl, Fn, FnDesc); - S.Diag(Fn->getLocation(), diag::note_ovl_candidate_deleted) - << FnKind << FnDesc - << (Fn->isDeleted() ? (Fn->isDeletedAsWritten() ? 1 : 2) : 0); - MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl); - return; - } + S.Diag(Fn->getLocation(), diag::note_ovl_candidate_deleted) + << FnKind << FnDesc + << (Fn->isDeleted() ? (Fn->isDeletedAsWritten() ? 1 : 2) : 0); + MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl); + return; + } - // We don't really have anything else to say about viable candidates. - if (Cand->Viable) { + // We don't really have anything else to say about viable candidates. S.NoteOverloadCandidate(Cand->FoundDecl, Fn); return; } @@ -9864,7 +10129,7 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand, case ovl_fail_bad_conversion: { unsigned I = (Cand->IgnoreObjectArgument ? 1 : 0); - for (unsigned N = Cand->NumConversions; I != N; ++I) + for (unsigned N = Cand->Conversions.size(); I != N; ++I) if (Cand->Conversions[I].isBad()) return DiagnoseBadConversion(S, Cand, I, TakingCandidateAddress); @@ -9880,6 +10145,20 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand, case ovl_fail_enable_if: return DiagnoseFailedEnableIfAttr(S, Cand); + case ovl_fail_ext_disabled: + return DiagnoseOpenCLExtensionDisabled(S, Cand); + + case ovl_fail_inhctor_slice: + // It's generally not interesting to note copy/move constructors here. + if (cast(Fn)->isCopyOrMoveConstructor()) + return; + S.Diag(Fn->getLocation(), + diag::note_ovl_candidate_inherited_constructor_slice) + << (Fn->getPrimaryTemplate() ? 1 : 0) + << Fn->getParamDecl(0)->getType()->isRValueReferenceType(); + MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl); + return; + case ovl_fail_addr_not_available: { bool Available = checkAddressOfCandidateIsAvailable(S, Cand->Function); (void)Available; @@ -9924,12 +10203,12 @@ static void NoteSurrogateCandidate(Sema &S, OverloadCandidate *Cand) { static void NoteBuiltinOperatorCandidate(Sema &S, StringRef Opc, SourceLocation OpLoc, OverloadCandidate *Cand) { - assert(Cand->NumConversions <= 2 && "builtin operator is not binary"); + assert(Cand->Conversions.size() <= 2 && "builtin operator is not binary"); std::string TypeStr("operator"); TypeStr += Opc; TypeStr += "("; TypeStr += Cand->BuiltinTypes.ParamTypes[0].getAsString(); - if (Cand->NumConversions == 1) { + if (Cand->Conversions.size() == 1) { TypeStr += ")"; S.Diag(OpLoc, diag::note_ovl_builtin_unary_candidate) << TypeStr; } else { @@ -9942,9 +10221,7 @@ static void NoteBuiltinOperatorCandidate(Sema &S, StringRef Opc, static void NoteAmbiguousUserConversions(Sema &S, SourceLocation OpLoc, OverloadCandidate *Cand) { - unsigned NoOperands = Cand->NumConversions; - for (unsigned ArgIdx = 0; ArgIdx < NoOperands; ++ArgIdx) { - const ImplicitConversionSequence &ICS = Cand->Conversions[ArgIdx]; + for (const ImplicitConversionSequence &ICS : Cand->Conversions) { if (ICS.isBad()) break; // all meaningless after first invalid if (!ICS.isAmbiguous()) continue; @@ -9964,7 +10241,8 @@ static SourceLocation GetLocationForCandidate(const OverloadCandidate *Cand) { static unsigned RankDeductionFailure(const DeductionFailureInfo &DFI) { switch ((Sema::TemplateDeductionResult)DFI.Result) { case Sema::TDK_Success: - llvm_unreachable("TDK_success while diagnosing bad deduction"); + case Sema::TDK_NonDependentConversionFailure: + llvm_unreachable("non-deduction failure while diagnosing bad deduction"); case Sema::TDK_Invalid: case Sema::TDK_Incomplete: @@ -9976,13 +10254,13 @@ static unsigned RankDeductionFailure(const DeductionFailureInfo &DFI) { case Sema::TDK_SubstitutionFailure: case Sema::TDK_DeducedMismatch: + case Sema::TDK_DeducedMismatchNested: case Sema::TDK_NonDeducedMismatch: case Sema::TDK_MiscellaneousDeductionFailure: case Sema::TDK_CUDATargetMismatch: return 3; case Sema::TDK_InstantiationDepth: - case Sema::TDK_FailedOverloadResolution: return 4; case Sema::TDK_InvalidExplicitArguments: @@ -10067,11 +10345,11 @@ struct CompareOverloadCandidatesForDisplay { // If there's any ordering between the defined conversions... // FIXME: this might not be transitive. - assert(L->NumConversions == R->NumConversions); + assert(L->Conversions.size() == R->Conversions.size()); int leftBetter = 0; unsigned I = (L->IgnoreObjectArgument || R->IgnoreObjectArgument); - for (unsigned E = L->NumConversions; I != E; ++I) { + for (unsigned E = L->Conversions.size(); I != E; ++I) { switch (CompareImplicitConversionSequences(S, Loc, L->Conversions[I], R->Conversions[I])) { @@ -10120,7 +10398,8 @@ struct CompareOverloadCandidatesForDisplay { } /// CompleteNonViableCandidate - Normally, overload resolution only -/// computes up to the first. Produces the FixIt set if possible. +/// computes up to the first bad conversion. Produces the FixIt set if +/// possible. static void CompleteNonViableCandidate(Sema &S, OverloadCandidate *Cand, ArrayRef Args) { assert(!Cand->Viable); @@ -10133,71 +10412,67 @@ static void CompleteNonViableCandidate(Sema &S, OverloadCandidate *Cand, // Use a implicit copy initialization to check conversion fixes. Cand->Fix.setConversionChecker(TryCopyInitialization); - // Skip forward to the first bad conversion. - unsigned ConvIdx = (Cand->IgnoreObjectArgument ? 1 : 0); - unsigned ConvCount = Cand->NumConversions; - while (true) { + // Attempt to fix the bad conversion. + unsigned ConvCount = Cand->Conversions.size(); + for (unsigned ConvIdx = (Cand->IgnoreObjectArgument ? 1 : 0); /**/; + ++ConvIdx) { assert(ConvIdx != ConvCount && "no bad conversion in candidate"); - ConvIdx++; - if (Cand->Conversions[ConvIdx - 1].isBad()) { - Unfixable = !Cand->TryToFixBadConversion(ConvIdx - 1, S); + if (Cand->Conversions[ConvIdx].isInitialized() && + Cand->Conversions[ConvIdx].isBad()) { + Unfixable = !Cand->TryToFixBadConversion(ConvIdx, S); break; } } - if (ConvIdx == ConvCount) - return; - - assert(!Cand->Conversions[ConvIdx].isInitialized() && - "remaining conversion is initialized?"); - // FIXME: this should probably be preserved from the overload // operation somehow. bool SuppressUserConversions = false; - const FunctionProtoType* Proto; - unsigned ArgIdx = ConvIdx; + unsigned ConvIdx = 0; + ArrayRef ParamTypes; if (Cand->IsSurrogate) { QualType ConvType = Cand->Surrogate->getConversionType().getNonReferenceType(); if (const PointerType *ConvPtrType = ConvType->getAs()) ConvType = ConvPtrType->getPointeeType(); - Proto = ConvType->getAs(); - ArgIdx--; + ParamTypes = ConvType->getAs()->getParamTypes(); + // Conversion 0 is 'this', which doesn't have a corresponding argument. + ConvIdx = 1; } else if (Cand->Function) { - Proto = Cand->Function->getType()->getAs(); + ParamTypes = + Cand->Function->getType()->getAs()->getParamTypes(); if (isa(Cand->Function) && - !isa(Cand->Function)) - ArgIdx--; + !isa(Cand->Function)) { + // Conversion 0 is 'this', which doesn't have a corresponding argument. + ConvIdx = 1; + } } else { - // Builtin binary operator with a bad first conversion. + // Builtin operator. assert(ConvCount <= 3); - for (; ConvIdx != ConvCount; ++ConvIdx) - Cand->Conversions[ConvIdx] - = TryCopyInitialization(S, Args[ConvIdx], - Cand->BuiltinTypes.ParamTypes[ConvIdx], - SuppressUserConversions, - /*InOverloadResolution*/ true, - /*AllowObjCWritebackConversion=*/ - S.getLangOpts().ObjCAutoRefCount); - return; + ParamTypes = Cand->BuiltinTypes.ParamTypes; } // Fill in the rest of the conversions. - unsigned NumParams = Proto->getNumParams(); - for (; ConvIdx != ConvCount; ++ConvIdx, ++ArgIdx) { - if (ArgIdx < NumParams) { - Cand->Conversions[ConvIdx] = TryCopyInitialization( - S, Args[ArgIdx], Proto->getParamType(ArgIdx), SuppressUserConversions, - /*InOverloadResolution=*/true, - /*AllowObjCWritebackConversion=*/ - S.getLangOpts().ObjCAutoRefCount); - // Store the FixIt in the candidate if it exists. - if (!Unfixable && Cand->Conversions[ConvIdx].isBad()) - Unfixable = !Cand->TryToFixBadConversion(ConvIdx, S); - } - else + for (unsigned ArgIdx = 0; ConvIdx != ConvCount; ++ConvIdx, ++ArgIdx) { + if (Cand->Conversions[ConvIdx].isInitialized()) { + // We've already checked this conversion. + } else if (ArgIdx < ParamTypes.size()) { + if (ParamTypes[ArgIdx]->isDependentType()) + Cand->Conversions[ConvIdx].setAsIdentityConversion( + Args[ArgIdx]->getType()); + else { + Cand->Conversions[ConvIdx] = + TryCopyInitialization(S, Args[ArgIdx], ParamTypes[ArgIdx], + SuppressUserConversions, + /*InOverloadResolution=*/true, + /*AllowObjCWritebackConversion=*/ + S.getLangOpts().ObjCAutoRefCount); + // Store the FixIt in the candidate if it exists. + if (!Unfixable && Cand->Conversions[ConvIdx].isBad()) + Unfixable = !Cand->TryToFixBadConversion(ConvIdx, S); + } + } else Cand->Conversions[ConvIdx].setEllipsis(); } } @@ -11221,7 +11496,7 @@ DiagnoseTwoPhaseLookup(Sema &SemaRef, SourceLocation FnLoc, TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef Args, bool *DoDiagnoseEmptyLookup = nullptr) { - if (SemaRef.ActiveTemplateInstantiations.empty() || !SS.isEmpty()) + if (!SemaRef.inTemplateInstantiation() || !SS.isEmpty()) return false; for (DeclContext *DC = SemaRef.CurContext; DC; DC = DC->getParent()) { @@ -11402,6 +11677,12 @@ BuildRecoveryCallExpr(Sema &SemaRef, Scope *S, Expr *Fn, assert(!R.empty() && "lookup results empty despite recovery"); + // If recovery created an ambiguity, just bail out. + if (R.isAmbiguous()) { + R.suppressDiagnostics(); + return ExprError(); + } + // Build an implicit member call if appropriate. Just drop the // casts and such from the call, we don't really care. ExprResult NewFn = ExprError(); @@ -11762,6 +12043,10 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc, if (CheckCallReturnType(FnDecl->getReturnType(), OpLoc, TheCall, FnDecl)) return ExprError(); + if (CheckFunctionCall(FnDecl, TheCall, + FnDecl->getType()->castAs())) + return ExprError(); + return MaybeBindToTemporary(TheCall); } else { // We matched a built-in operator. Convert the arguments, then @@ -11992,16 +12277,20 @@ Sema::CreateOverloadedBinOp(SourceLocation OpLoc, return ExprError(); ArrayRef ArgsArray(Args, 2); + const Expr *ImplicitThis = nullptr; // Cut off the implicit 'this'. - if (isa(FnDecl)) + if (isa(FnDecl)) { + ImplicitThis = ArgsArray[0]; ArgsArray = ArgsArray.slice(1); + } // Check for a self move. if (Op == OO_Equal) DiagnoseSelfMove(Args[0], Args[1], OpLoc); - checkCall(FnDecl, nullptr, ArgsArray, isa(FnDecl), OpLoc, - TheCall->getSourceRange(), VariadicDoesNotApply); + checkCall(FnDecl, nullptr, ImplicitThis, ArgsArray, + isa(FnDecl), OpLoc, TheCall->getSourceRange(), + VariadicDoesNotApply); return MaybeBindToTemporary(TheCall); } else { @@ -12210,6 +12499,10 @@ Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc, if (CheckCallReturnType(FnDecl->getReturnType(), LLoc, TheCall, FnDecl)) return ExprError(); + if (CheckFunctionCall(Method, TheCall, + Method->getType()->castAs())) + return ExprError(); + return MaybeBindToTemporary(TheCall); } else { // We matched a built-in operator. Convert the arguments, then @@ -12399,11 +12692,10 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE, ObjectClassification, Args, CandidateSet, /*SuppressUserConversions=*/false); } else { - AddMethodTemplateCandidate(cast(Func), - I.getPair(), ActingDC, TemplateArgs, - ObjectType, ObjectClassification, - Args, CandidateSet, - /*SuppressUsedConversions=*/false); + AddMethodTemplateCandidate( + cast(Func), I.getPair(), ActingDC, + TemplateArgs, ObjectType, ObjectClassification, Args, CandidateSet, + /*SuppressUsedConversions=*/false); } } @@ -12516,7 +12808,7 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE, diag::err_ovl_no_viable_member_function_in_call) << Method << Method->getSourceRange(); Diag(Method->getLocation(), - diag::note_ovl_candidate_disabled_by_enable_if_attr) + diag::note_ovl_candidate_disabled_by_function_cond_attr) << Attr->getCond()->getSourceRange() << Attr->getMessage(); return ExprError(); } @@ -12598,9 +12890,8 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, for (LookupResult::iterator Oper = R.begin(), OperEnd = R.end(); Oper != OperEnd; ++Oper) { AddMethodCandidate(Oper.getPair(), Object.get()->getType(), - Object.get()->Classify(Context), - Args, CandidateSet, - /*SuppressUserConversions=*/ false); + Object.get()->Classify(Context), Args, CandidateSet, + /*SuppressUserConversions=*/false); } // C++ [over.call.object]p2: @@ -12949,7 +13240,11 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc, Base, ResultTy, VK, OpLoc, false); if (CheckCallReturnType(Method->getReturnType(), OpLoc, TheCall, Method)) - return ExprError(); + return ExprError(); + + if (CheckFunctionCall(Method, TheCall, + Method->getType()->castAs())) + return ExprError(); return MaybeBindToTemporary(TheCall); } diff --git a/tools/clang/lib/Sema/SemaTemplateVariadic.cpp b/tools/clang/lib/Sema/SemaTemplateVariadic.cpp index d83d1d3..4d758a1 100644 --- a/tools/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/tools/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -321,6 +321,7 @@ bool Sema::DiagnoseUnexpandedParameterPack(const DeclarationNameInfo &NameInfo, case DeclarationName::CXXOperatorName: case DeclarationName::CXXLiteralOperatorName: case DeclarationName::CXXUsingDirective: + case DeclarationName::CXXDeductionGuideName: return false; case DeclarationName::CXXConstructorName: @@ -390,21 +391,18 @@ void Sema::collectUnexpandedParameterPacks(QualType T, void Sema::collectUnexpandedParameterPacks(TypeLoc TL, SmallVectorImpl &Unexpanded) { CollectUnexpandedParameterPacksVisitor(Unexpanded).TraverseTypeLoc(TL); -} +} -void Sema::collectUnexpandedParameterPacks(CXXScopeSpec &SS, - SmallVectorImpl &Unexpanded) { - NestedNameSpecifier *Qualifier = SS.getScopeRep(); - if (!Qualifier) - return; - - NestedNameSpecifierLoc QualifierLoc(Qualifier, SS.location_data()); +void Sema::collectUnexpandedParameterPacks( + NestedNameSpecifierLoc NNS, + SmallVectorImpl &Unexpanded) { CollectUnexpandedParameterPacksVisitor(Unexpanded) - .TraverseNestedNameSpecifierLoc(QualifierLoc); + .TraverseNestedNameSpecifierLoc(NNS); } -void Sema::collectUnexpandedParameterPacks(const DeclarationNameInfo &NameInfo, - SmallVectorImpl &Unexpanded) { +void Sema::collectUnexpandedParameterPacks( + const DeclarationNameInfo &NameInfo, + SmallVectorImpl &Unexpanded) { CollectUnexpandedParameterPacksVisitor(Unexpanded) .TraverseDeclarationNameInfo(NameInfo); } @@ -1018,6 +1016,11 @@ ExprResult Sema::ActOnCXXFoldExpr(SourceLocation LParenLoc, Expr *LHS, CheckFoldOperand(*this, LHS); CheckFoldOperand(*this, RHS); + auto DiscardOperands = [&] { + CorrectDelayedTyposInExpr(LHS); + CorrectDelayedTyposInExpr(RHS); + }; + // [expr.prim.fold]p3: // In a binary fold, op1 and op2 shall be the same fold-operator, and // either e1 shall contain an unexpanded parameter pack or e2 shall contain @@ -1025,6 +1028,7 @@ ExprResult Sema::ActOnCXXFoldExpr(SourceLocation LParenLoc, Expr *LHS, if (LHS && RHS && LHS->containsUnexpandedParameterPack() == RHS->containsUnexpandedParameterPack()) { + DiscardOperands(); return Diag(EllipsisLoc, LHS->containsUnexpandedParameterPack() ? diag::err_fold_expression_packs_both_sides @@ -1038,6 +1042,7 @@ ExprResult Sema::ActOnCXXFoldExpr(SourceLocation LParenLoc, Expr *LHS, if (!LHS || !RHS) { Expr *Pack = LHS ? LHS : RHS; assert(Pack && "fold expression with neither LHS nor RHS"); + DiscardOperands(); if (!Pack->containsUnexpandedParameterPack()) return Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs) << Pack->getSourceRange(); diff --git a/tools/clang/lib/Sema/SemaType.cpp b/tools/clang/lib/Sema/SemaType.cpp index 2fb818a..cfe41f1 100644 --- a/tools/clang/lib/Sema/SemaType.cpp +++ b/tools/clang/lib/Sema/SemaType.cpp @@ -742,7 +742,7 @@ static void diagnoseAndRemoveTypeQualifiers(Sema &S, const DeclSpec &DS, if (!(RemoveTQs & Qual.first)) continue; - if (S.ActiveTemplateInstantiations.empty()) { + if (!S.inTemplateInstantiation()) { if (TypeQuals & Qual.first) S.Diag(Qual.second, DiagID) << DeclSpec::getSpecifierName(Qual.first) << TypeSoFar @@ -1410,13 +1410,6 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { Result = Context.LongDoubleTy; else Result = Context.DoubleTy; - - if (S.getLangOpts().OpenCL && - !(S.getOpenCLOptions().cl_khr_fp64)) { - S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_requires_extension) - << Result << "cl_khr_fp64"; - declarator.setInvalidType(true); - } break; case DeclSpec::TST_float128: if (!S.Context.getTargetInfo().hasFloat128Type()) @@ -1468,48 +1461,6 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { Result = S.GetTypeFromParser(DS.getRepAsType()); if (Result.isNull()) { declarator.setInvalidType(true); - } else if (S.getLangOpts().OpenCL) { - if (Result->getAs()) { - StringRef TypeName = Result.getBaseTypeIdentifier()->getName(); - bool NoExtTypes = - llvm::StringSwitch(TypeName) - .Cases("atomic_int", "atomic_uint", "atomic_float", - "atomic_flag", true) - .Default(false); - if (!S.getOpenCLOptions().cl_khr_int64_base_atomics && !NoExtTypes) { - S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_requires_extension) - << Result << "cl_khr_int64_base_atomics"; - declarator.setInvalidType(true); - } - if (!S.getOpenCLOptions().cl_khr_int64_extended_atomics && - !NoExtTypes) { - S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_requires_extension) - << Result << "cl_khr_int64_extended_atomics"; - declarator.setInvalidType(true); - } - if (!S.getOpenCLOptions().cl_khr_fp64 && - !TypeName.compare("atomic_double")) { - S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_requires_extension) - << Result << "cl_khr_fp64"; - declarator.setInvalidType(true); - } - } else if (!S.getOpenCLOptions().cl_khr_gl_msaa_sharing && - (Result->isOCLImage2dArrayMSAADepthROType() || - Result->isOCLImage2dArrayMSAADepthWOType() || - Result->isOCLImage2dArrayMSAADepthRWType() || - Result->isOCLImage2dArrayMSAAROType() || - Result->isOCLImage2dArrayMSAARWType() || - Result->isOCLImage2dArrayMSAAWOType() || - Result->isOCLImage2dMSAADepthROType() || - Result->isOCLImage2dMSAADepthRWType() || - Result->isOCLImage2dMSAADepthWOType() || - Result->isOCLImage2dMSAAROType() || - Result->isOCLImage2dMSAARWType() || - Result->isOCLImage2dMSAAWOType())) { - S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_requires_extension) - << Result << "cl_khr_gl_msaa_sharing"; - declarator.setInvalidType(true); - } } // TypeQuals handled by caller. @@ -1560,47 +1511,7 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { break; case DeclSpec::TST_auto: - // TypeQuals handled by caller. - // If auto is mentioned in a lambda parameter context, convert it to a - // template parameter type immediately, with the appropriate depth and - // index, and update sema's state (LambdaScopeInfo) for the current lambda - // being analyzed (which tracks the invented type template parameter). - if (declarator.getContext() == Declarator::LambdaExprParameterContext) { - sema::LambdaScopeInfo *LSI = S.getCurLambda(); - assert(LSI && "No LambdaScopeInfo on the stack!"); - const unsigned TemplateParameterDepth = LSI->AutoTemplateParameterDepth; - const unsigned AutoParameterPosition = LSI->AutoTemplateParams.size(); - const bool IsParameterPack = declarator.hasEllipsis(); - - // Turns out we must create the TemplateTypeParmDecl here to - // retrieve the corresponding template parameter type. - TemplateTypeParmDecl *CorrespondingTemplateParam = - TemplateTypeParmDecl::Create(Context, - // Temporarily add to the TranslationUnit DeclContext. When the - // associated TemplateParameterList is attached to a template - // declaration (such as FunctionTemplateDecl), the DeclContext - // for each template parameter gets updated appropriately via - // a call to AdoptTemplateParameterList. - Context.getTranslationUnitDecl(), - /*KeyLoc*/ SourceLocation(), - /*NameLoc*/ declarator.getLocStart(), - TemplateParameterDepth, - AutoParameterPosition, // our template param index - /* Identifier*/ nullptr, false, IsParameterPack); - LSI->AutoTemplateParams.push_back(CorrespondingTemplateParam); - // Replace the 'auto' in the function parameter with this invented - // template type parameter. - Result = QualType(CorrespondingTemplateParam->getTypeForDecl(), 0); - } else { - // If auto appears in the declaration of a template parameter, treat - // the parameter as type-dependent. - bool IsDependent = - S.getLangOpts().CPlusPlus1z && - declarator.getContext() == Declarator::TemplateParamContext; - Result = Context.getAutoType(QualType(), - AutoTypeKeyword::Auto, - IsDependent); - } + Result = Context.getAutoType(QualType(), AutoTypeKeyword::Auto, false); break; case DeclSpec::TST_auto_type: @@ -1645,6 +1556,10 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { break; } + if (S.getLangOpts().OpenCL && + S.checkOpenCLDisabledTypeDeclSpec(DS, Result)) + declarator.setInvalidType(true); + // Handle complex types. if (DS.getTypeSpecComplex() == DeclSpec::TSC_complex) { if (S.getLangOpts().Freestanding) @@ -2827,6 +2742,12 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, D.getDeclSpec().getAttributes().getList()); break; + case UnqualifiedId::IK_DeductionGuideName: + // Deduction guides have a trailing return type and no type in their + // decl-specifier sequence. Use a placeholder return type for now. + T = SemaRef.Context.DependentTy; + break; + case UnqualifiedId::IK_ConversionFunctionId: // The result type of a conversion function is the type that it // converts to. @@ -2839,12 +2760,20 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, distributeTypeAttrsFromDeclarator(state, T); // C++11 [dcl.spec.auto]p5: reject 'auto' if it is not in an allowed context. - if (D.getDeclSpec().containsPlaceholderType()) { + if (DeducedType *Deduced = T->getContainedDeducedType()) { + AutoType *Auto = dyn_cast(Deduced); int Error = -1; + // Is this a 'auto' or 'decltype(auto)' type (as opposed to __auto_type or + // class template argument deduction)? + bool IsCXXAutoType = + (Auto && Auto->getKeyword() != AutoTypeKeyword::GNUAutoType); + switch (D.getContext()) { case Declarator::LambdaExprContext: - llvm_unreachable("Can't specify a type specifier in lambda grammar"); + // Declared return type of a lambda-declarator is implicit and is always + // 'auto'. + break; case Declarator::ObjCParameterContext: case Declarator::ObjCResultContext: case Declarator::PrototypeContext: @@ -2852,9 +2781,35 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, break; case Declarator::LambdaExprParameterContext: // In C++14, generic lambdas allow 'auto' in their parameters. - if (!(SemaRef.getLangOpts().CPlusPlus14 - && D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto)) + if (!SemaRef.getLangOpts().CPlusPlus14 || + !Auto || Auto->getKeyword() != AutoTypeKeyword::Auto) Error = 16; + else { + // If auto is mentioned in a lambda parameter context, convert it to a + // template parameter type. + sema::LambdaScopeInfo *LSI = SemaRef.getCurLambda(); + assert(LSI && "No LambdaScopeInfo on the stack!"); + const unsigned TemplateParameterDepth = LSI->AutoTemplateParameterDepth; + const unsigned AutoParameterPosition = LSI->AutoTemplateParams.size(); + const bool IsParameterPack = D.hasEllipsis(); + + // Create the TemplateTypeParmDecl here to retrieve the corresponding + // template parameter type. Template parameters are temporarily added + // to the TU until the associated TemplateDecl is created. + TemplateTypeParmDecl *CorrespondingTemplateParam = + TemplateTypeParmDecl::Create( + SemaRef.Context, SemaRef.Context.getTranslationUnitDecl(), + /*KeyLoc*/SourceLocation(), /*NameLoc*/D.getLocStart(), + TemplateParameterDepth, AutoParameterPosition, + /*Identifier*/nullptr, false, IsParameterPack); + LSI->AutoTemplateParams.push_back(CorrespondingTemplateParam); + // Replace the 'auto' in the function parameter with this invented + // template type parameter. + // FIXME: Retain some type sugar to indicate that this was written + // as 'auto'. + T = SemaRef.ReplaceAutoType( + T, QualType(CorrespondingTemplateParam->getTypeForDecl(), 0)); + } break; case Declarator::MemberContext: { if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static || @@ -2868,6 +2823,8 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, case TTK_Class: Error = 5; /* Class member */ break; case TTK_Interface: Error = 6; /* Interface member */ break; } + if (D.getDeclSpec().isFriendSpecified()) + Error = 20; // Friend type break; } case Declarator::CXXCatchContext: @@ -2875,8 +2832,10 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, Error = 7; // Exception declaration break; case Declarator::TemplateParamContext: - if (!SemaRef.getLangOpts().CPlusPlus1z) - Error = 8; // Template parameter + if (isa(Deduced)) + Error = 19; // Template parameter + else if (!SemaRef.getLangOpts().CPlusPlus1z) + Error = 8; // Template parameter (until C++1z) break; case Declarator::BlockLiteralContext: Error = 9; // Block literal @@ -2889,15 +2848,17 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, Error = 12; // Type alias break; case Declarator::TrailingReturnContext: - if (!SemaRef.getLangOpts().CPlusPlus14 || - D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type) + if (!SemaRef.getLangOpts().CPlusPlus14 || !IsCXXAutoType) Error = 13; // Function return type break; case Declarator::ConversionIdContext: - if (!SemaRef.getLangOpts().CPlusPlus14 || - D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type) + if (!SemaRef.getLangOpts().CPlusPlus14 || !IsCXXAutoType) Error = 14; // conversion-type-id break; + case Declarator::FunctionalCastContext: + if (isa(Deduced)) + break; + LLVM_FALLTHROUGH; case Declarator::TypeNameContext: Error = 15; // Generic break; @@ -2906,9 +2867,14 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, case Declarator::ForContext: case Declarator::InitStmtContext: case Declarator::ConditionContext: + // FIXME: P0091R3 (erroneously) does not permit class template argument + // deduction in conditions, for-init-statements, and other declarations + // that are not simple-declarations. break; case Declarator::CXXNewContext: - if (D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type) + // FIXME: P0091R3 does not permit class template argument deduction here, + // but we follow GCC and allow it anyway. + if (!IsCXXAutoType && !isa(Deduced)) Error = 17; // 'new' type break; case Declarator::KNRTypeListContext: @@ -2922,8 +2888,7 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, // In Objective-C it is an error to use 'auto' on a function declarator // (and everywhere for '__auto_type'). if (D.isFunctionDeclarator() && - (!SemaRef.getLangOpts().CPlusPlus11 || - D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type)) + (!SemaRef.getLangOpts().CPlusPlus11 || !IsCXXAutoType)) Error = 13; bool HaveTrailing = false; @@ -2933,21 +2898,11 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, // level. Check all declarator chunks (outermost first) anyway, to give // better diagnostics. // We don't support '__auto_type' with trailing return types. - if (SemaRef.getLangOpts().CPlusPlus11 && - D.getDeclSpec().getTypeSpecType() != DeclSpec::TST_auto_type) { - for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) { - unsigned chunkIndex = e - i - 1; - state.setCurrentChunkIndex(chunkIndex); - DeclaratorChunk &DeclType = D.getTypeObject(chunkIndex); - if (DeclType.Kind == DeclaratorChunk::Function) { - const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun; - if (FTI.hasTrailingReturnType()) { - HaveTrailing = true; - Error = -1; - break; - } - } - } + // FIXME: Should we only do this for 'auto' and not 'decltype(auto)'? + if (SemaRef.getLangOpts().CPlusPlus11 && IsCXXAutoType && + D.hasTrailingReturnType()) { + HaveTrailing = true; + Error = -1; } SourceRange AutoRange = D.getDeclSpec().getTypeSpecTypeLoc(); @@ -2955,15 +2910,28 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, AutoRange = D.getName().getSourceRange(); if (Error != -1) { - unsigned Keyword; - switch (D.getDeclSpec().getTypeSpecType()) { - case DeclSpec::TST_auto: Keyword = 0; break; - case DeclSpec::TST_decltype_auto: Keyword = 1; break; - case DeclSpec::TST_auto_type: Keyword = 2; break; - default: llvm_unreachable("unknown auto TypeSpecType"); + unsigned Kind; + if (Auto) { + switch (Auto->getKeyword()) { + case AutoTypeKeyword::Auto: Kind = 0; break; + case AutoTypeKeyword::DecltypeAuto: Kind = 1; break; + case AutoTypeKeyword::GNUAutoType: Kind = 2; break; + } + } else { + assert(isa(Deduced) && + "unknown auto type"); + Kind = 3; } + + auto *DTST = dyn_cast(Deduced); + TemplateName TN = DTST ? DTST->getTemplateName() : TemplateName(); + SemaRef.Diag(AutoRange.getBegin(), diag::err_auto_not_allowed) - << Keyword << Error << AutoRange; + << Kind << Error << (int)SemaRef.getTemplateNameKindForDiagnostics(TN) + << QualType(Deduced, 0) << AutoRange; + if (auto *TD = TN.getAsTemplateDecl()) + SemaRef.Diag(TD->getLocation(), diag::note_template_decl_here); + T = SemaRef.Context.IntTy; D.setInvalidType(true); } else if (!HaveTrailing) { @@ -3003,6 +2971,7 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state, DiagID = diag::err_type_defined_in_alias_template; break; case Declarator::TypeNameContext: + case Declarator::FunctionalCastContext: case Declarator::ConversionIdContext: case Declarator::TemplateParamContext: case Declarator::CXXNewContext: @@ -3215,7 +3184,7 @@ getCCForDeclaratorChunk(Sema &S, Declarator &D, if (Attr->getKind() == AttributeList::AT_OpenCLKernel) { llvm::Triple::ArchType arch = S.Context.getTargetInfo().getTriple().getArch(); if (arch == llvm::Triple::spir || arch == llvm::Triple::spir64 || - arch == llvm::Triple::amdgcn) { + arch == llvm::Triple::amdgcn || arch == llvm::Triple::r600) { CC = CC_OpenCLKernel; } break; @@ -3495,6 +3464,68 @@ static FileID getNullabilityCompletenessCheckFileID(Sema &S, return file; } +/// Creates a fix-it to insert a C-style nullability keyword at \p pointerLoc, +/// taking into account whitespace before and after. +static void fixItNullability(Sema &S, DiagnosticBuilder &Diag, + SourceLocation PointerLoc, + NullabilityKind Nullability) { + assert(PointerLoc.isValid()); + if (PointerLoc.isMacroID()) + return; + + SourceLocation FixItLoc = S.getLocForEndOfToken(PointerLoc); + if (!FixItLoc.isValid() || FixItLoc == PointerLoc) + return; + + const char *NextChar = S.SourceMgr.getCharacterData(FixItLoc); + if (!NextChar) + return; + + SmallString<32> InsertionTextBuf{" "}; + InsertionTextBuf += getNullabilitySpelling(Nullability); + InsertionTextBuf += " "; + StringRef InsertionText = InsertionTextBuf.str(); + + if (isWhitespace(*NextChar)) { + InsertionText = InsertionText.drop_back(); + } else if (NextChar[-1] == '[') { + if (NextChar[0] == ']') + InsertionText = InsertionText.drop_back().drop_front(); + else + InsertionText = InsertionText.drop_front(); + } else if (!isIdentifierBody(NextChar[0], /*allow dollar*/true) && + !isIdentifierBody(NextChar[-1], /*allow dollar*/true)) { + InsertionText = InsertionText.drop_back().drop_front(); + } + + Diag << FixItHint::CreateInsertion(FixItLoc, InsertionText); +} + +static void emitNullabilityConsistencyWarning(Sema &S, + SimplePointerKind PointerKind, + SourceLocation PointerLoc) { + assert(PointerLoc.isValid()); + + if (PointerKind == SimplePointerKind::Array) { + S.Diag(PointerLoc, diag::warn_nullability_missing_array); + } else { + S.Diag(PointerLoc, diag::warn_nullability_missing) + << static_cast(PointerKind); + } + + if (PointerLoc.isMacroID()) + return; + + auto addFixIt = [&](NullabilityKind Nullability) { + auto Diag = S.Diag(PointerLoc, diag::note_nullability_fix_it); + Diag << static_cast(Nullability); + Diag << static_cast(PointerKind); + fixItNullability(S, Diag, PointerLoc, Nullability); + }; + addFixIt(NullabilityKind::Nullable); + addFixIt(NullabilityKind::NonNull); +} + /// Complains about missing nullability if the file containing \p pointerLoc /// has other uses of nullability (either the keywords or the \c assume_nonnull /// pragma). @@ -3531,12 +3562,7 @@ static void checkNullabilityConsistency(Sema &S, } // Complain about missing nullability. - if (pointerKind == SimplePointerKind::Array) { - S.Diag(pointerLoc, diag::warn_nullability_missing_array); - } else { - S.Diag(pointerLoc, diag::warn_nullability_missing) - << static_cast(pointerKind); - } + emitNullabilityConsistencyWarning(S, pointerKind, pointerLoc); } /// Marks that a nullability feature has been used in the file containing @@ -3561,13 +3587,8 @@ static void recordNullabilitySeen(Sema &S, SourceLocation loc) { if (fileNullability.PointerLoc.isInvalid()) return; - if (fileNullability.PointerKind == - static_cast(SimplePointerKind::Array)) { - S.Diag(fileNullability.PointerLoc, diag::warn_nullability_missing_array); - } else { - S.Diag(fileNullability.PointerLoc, diag::warn_nullability_missing) - << static_cast(fileNullability.PointerKind); - } + auto kind = static_cast(fileNullability.PointerKind); + emitNullabilityConsistencyWarning(S, kind, fileNullability.PointerLoc); } /// Returns true if any of the declarator chunks before \p endIndex include a @@ -3632,17 +3653,32 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // If T is 'decltype(auto)', the only declarators we can have are parens // and at most one function declarator if this is a function declaration. - if (const AutoType *AT = T->getAs()) { - if (AT->isDecltypeAuto()) { + // If T is a deduced class template specialization type, we can have no + // declarator chunks at all. + if (auto *DT = T->getAs()) { + const AutoType *AT = T->getAs(); + bool IsClassTemplateDeduction = isa(DT); + if ((AT && AT->isDecltypeAuto()) || IsClassTemplateDeduction) { for (unsigned I = 0, E = D.getNumTypeObjects(); I != E; ++I) { unsigned Index = E - I - 1; DeclaratorChunk &DeclChunk = D.getTypeObject(Index); - unsigned DiagId = diag::err_decltype_auto_compound_type; + unsigned DiagId = IsClassTemplateDeduction + ? diag::err_deduced_class_template_compound_type + : diag::err_decltype_auto_compound_type; unsigned DiagKind = 0; switch (DeclChunk.Kind) { case DeclaratorChunk::Paren: + // FIXME: Rejecting this is a little silly. + if (IsClassTemplateDeduction) { + DiagKind = 4; + break; + } continue; case DeclaratorChunk::Function: { + if (IsClassTemplateDeduction) { + DiagKind = 3; + break; + } unsigned FnIndex; if (D.isFunctionDeclarationContext() && D.isFunctionDeclarator(FnIndex) && FnIndex == Index) @@ -3843,6 +3879,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, case Declarator::TemplateParamContext: case Declarator::TemplateTypeArgContext: case Declarator::TypeNameContext: + case Declarator::FunctionalCastContext: // Don't infer in these contexts. break; } @@ -3901,20 +3938,10 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, if (pointerLoc.isValid() && complainAboutInferringWithinChunk != PointerWrappingDeclaratorKind::None) { - SourceLocation fixItLoc = S.getLocForEndOfToken(pointerLoc); - StringRef insertionText = " _Nonnull "; - if (const char *nextChar = S.SourceMgr.getCharacterData(fixItLoc)) { - if (isWhitespace(*nextChar)) { - insertionText = insertionText.drop_back(); - } else if (!isIdentifierBody(nextChar[0], /*allow dollar*/true) && - !isIdentifierBody(nextChar[-1], /*allow dollar*/true)) { - insertionText = insertionText.drop_back().drop_front(); - } - } - - S.Diag(pointerLoc, diag::warn_nullability_inferred_on_nested_type) - << static_cast(complainAboutInferringWithinChunk) - << FixItHint::CreateInsertion(fixItLoc, insertionText); + auto Diag = + S.Diag(pointerLoc, diag::warn_nullability_inferred_on_nested_type); + Diag << static_cast(complainAboutInferringWithinChunk); + fixItNullability(S, Diag, pointerLoc, NullabilityKind::NonNull); } if (inferNullabilityInnerOnly) @@ -3941,7 +3968,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // If the type itself could have nullability but does not, infer pointer // nullability and perform consistency checking. - if (S.ActiveTemplateInstantiations.empty()) { + if (S.CodeSynthesisContexts.empty()) { if (T->canHaveNullability() && !T->getNullability(S.Context)) { if (isVaList(T)) { // Record that we've seen a pointer, but do nothing else. @@ -4142,7 +4169,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, if (!D.isInvalidType()) { // trailing-return-type is only required if we're declaring a function, // and not, for instance, a pointer to a function. - if (D.getDeclSpec().containsPlaceholderType() && + if (D.getDeclSpec().hasAutoTypeSpec() && !FTI.hasTrailingReturnType() && chunkIndex == 0 && !S.getLangOpts().CPlusPlus14) { S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(), @@ -4154,16 +4181,25 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, } else if (FTI.hasTrailingReturnType()) { // T must be exactly 'auto' at this point. See CWG issue 681. if (isa(T)) { - S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(), + S.Diag(D.getLocStart(), diag::err_trailing_return_in_parens) - << T << D.getDeclSpec().getSourceRange(); + << T << D.getSourceRange(); D.setInvalidType(true); + } else if (D.getName().getKind() == + UnqualifiedId::IK_DeductionGuideName) { + if (T != Context.DependentTy) { + S.Diag(D.getDeclSpec().getLocStart(), + diag::err_deduction_guide_with_complex_decl) + << D.getSourceRange(); + D.setInvalidType(true); + } } else if (D.getContext() != Declarator::LambdaExprContext && (T.hasQualifiers() || !isa(T) || - cast(T)->getKeyword() != AutoTypeKeyword::Auto)) { + cast(T)->getKeyword() != + AutoTypeKeyword::Auto)) { S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(), - diag::err_trailing_return_without_auto) - << T << D.getDeclSpec().getSourceRange(); + diag::err_trailing_return_without_auto) + << T << D.getDeclSpec().getSourceRange(); D.setInvalidType(true); } T = S.GetTypeFromParser(FTI.getTrailingReturnType(), &TInfo); @@ -4177,7 +4213,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // C99 6.7.5.3p1: The return type may not be a function or array type. // For conversion functions, we'll diagnose this particular error later. - if ((T->isArrayType() || T->isFunctionType()) && + if (!D.isInvalidType() && (T->isArrayType() || T->isFunctionType()) && (D.getName().getKind() != UnqualifiedId::IK_ConversionFunctionId)) { unsigned diagID = diag::err_func_returning_array_function; // Last processing chunk in block context means this function chunk @@ -4194,7 +4230,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // FIXME: This really should be in BuildFunctionType. if (T->isHalfType()) { if (S.getLangOpts().OpenCL) { - if (!S.getOpenCLOptions().cl_khr_fp16) { + if (!S.getOpenCLOptions().isEnabled("cl_khr_fp16")) { S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return) << T << 0 /*pointer hint*/; D.setInvalidType(true); @@ -4427,7 +4463,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // Disallow half FP parameters. // FIXME: This really should be in BuildFunctionType. if (S.getLangOpts().OpenCL) { - if (!S.getOpenCLOptions().cl_khr_fp16) { + if (!S.getOpenCLOptions().isEnabled("cl_khr_fp16")) { S.Diag(Param->getLocation(), diag::err_opencl_half_param) << ParamTy; D.setInvalidType(); @@ -4461,6 +4497,11 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, HasAnyInterestingExtParameterInfos = true; } + if (Param->hasAttr()) { + ExtParameterInfos[i] = ExtParameterInfos[i].withHasPassObjectSize(); + HasAnyInterestingExtParameterInfos = true; + } + ParamTys.push_back(ParamTy); } @@ -4593,14 +4634,18 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // // Core issue 547 also allows cv-qualifiers on function types that are // top-level template type arguments. - bool FreeFunction; - if (!D.getCXXScopeSpec().isSet()) { - FreeFunction = ((D.getContext() != Declarator::MemberContext && - D.getContext() != Declarator::LambdaExprContext) || - D.getDeclSpec().isFriendSpecified()); + enum { NonMember, Member, DeductionGuide } Kind = NonMember; + if (D.getName().getKind() == UnqualifiedId::IK_DeductionGuideName) + Kind = DeductionGuide; + else if (!D.getCXXScopeSpec().isSet()) { + if ((D.getContext() == Declarator::MemberContext || + D.getContext() == Declarator::LambdaExprContext) && + !D.getDeclSpec().isFriendSpecified()) + Kind = Member; } else { DeclContext *DC = S.computeDeclContext(D.getCXXScopeSpec()); - FreeFunction = (DC && !DC->isRecord()); + if (!DC || DC->isRecord()) + Kind = Member; } // C++11 [dcl.fct]p6 (w/DR1417): @@ -4620,7 +4665,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // // ... for instance. if (IsQualifiedFunction && - !(!FreeFunction && + !(Kind == Member && D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_static) && !IsTypedefName && D.getContext() != Declarator::TemplateTypeArgContext) { @@ -4648,7 +4693,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, } S.Diag(Loc, diag::err_invalid_qualified_function_type) - << FreeFunction << D.isFunctionDeclarator() << T + << Kind << D.isFunctionDeclarator() << T << getFunctionQualifiersAsString(FnTy) << FixItHint::CreateRemoval(RemovalRange); @@ -4732,6 +4777,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, case Declarator::ObjCParameterContext: // FIXME: special diagnostic here? case Declarator::ObjCResultContext: // FIXME: special diagnostic here? case Declarator::TypeNameContext: + case Declarator::FunctionalCastContext: case Declarator::CXXNewContext: case Declarator::AliasDeclContext: case Declarator::AliasTemplateContext: @@ -5282,7 +5328,7 @@ namespace { ParmVarDecl *Param = cast(FTI.Params[i].Param); TL.setParam(tpi++, Param); } - // FIXME: exception specs + TL.setExceptionSpecRange(FTI.getExceptionSpecRange()); } void VisitParenTypeLoc(ParenTypeLoc TL) { assert(Chunk.Kind == DeclaratorChunk::Paren); @@ -6912,8 +6958,10 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, (TAL == TAL_DeclSpec || TAL == TAL_DeclChunk)) { Declarator &D = state.getDeclarator(); if (state.getCurrentChunkIndex() > 0 && - D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind == - DeclaratorChunk::Pointer) { + (D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind == + DeclaratorChunk::Pointer || + D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind == + DeclaratorChunk::BlockPointer)) { type = state.getSema().Context.getAddrSpaceQualType( type, LangAS::opencl_generic); } else if (state.getCurrentChunkIndex() == 0 && @@ -7529,7 +7577,7 @@ QualType Sema::BuildDecltypeType(Expr *E, SourceLocation Loc, if (ER.isInvalid()) return QualType(); E = ER.get(); - if (AsUnevaluated && ActiveTemplateInstantiations.empty() && + if (AsUnevaluated && CodeSynthesisContexts.empty() && E->HasSideEffects(Context, false)) { // The expression operand for decltype is in an unevaluated expression // context, so side effects could result in unintended consequences. diff --git a/tools/clang/lib/Sema/TreeTransform.h b/tools/clang/lib/Sema/TreeTransform.h index 15b722b..510c26d 100644 --- a/tools/clang/lib/Sema/TreeTransform.h +++ b/tools/clang/lib/Sema/TreeTransform.h @@ -307,6 +307,17 @@ class TreeTransform { /// QualType TransformType(TypeLocBuilder &TLB, TypeLoc TL); + /// \brief Transform a type that is permitted to produce a + /// DeducedTemplateSpecializationType. + /// + /// This is used in the (relatively rare) contexts where it is acceptable + /// for transformation to produce a class template type with deduced + /// template arguments. + /// @{ + QualType TransformTypeWithDeducedTST(QualType T); + TypeSourceInfo *TransformTypeWithDeducedTST(TypeSourceInfo *DI); + /// @} + /// \brief Transform the given statement. /// /// By default, this routine transforms a statement by delegating to the @@ -457,6 +468,10 @@ class TreeTransform { return cast_or_null(getDerived().TransformDecl(Loc, D)); } + /// Transform the set of declarations in an OverloadExpr. + bool TransformOverloadExprDecls(OverloadExpr *Old, bool RequiresADL, + LookupResult &R); + /// \brief Transform the given nested-name-specifier with source-location /// information. /// @@ -501,7 +516,8 @@ class TreeTransform { TransformTemplateName(CXXScopeSpec &SS, TemplateName Name, SourceLocation NameLoc, QualType ObjectType = QualType(), - NamedDecl *FirstQualifierInScope = nullptr); + NamedDecl *FirstQualifierInScope = nullptr, + bool AllowInjectedClassName = false); /// \brief Transform the given template argument. /// @@ -667,6 +683,16 @@ class TreeTransform { OMPClause *Transform ## Class(Class *S); #include "clang/Basic/OpenMPKinds.def" + /// \brief Build a new qualified type given its unqualified type and type + /// qualifiers. + /// + /// By default, this routine adds type qualifiers only to types that can + /// have qualifiers, and silently suppresses those qualifiers that are not + /// permitted. Subclasses may override this routine to provide different + /// behavior. + QualType RebuildQualifiedType(QualType T, SourceLocation Loc, + Qualifiers Quals); + /// \brief Build a new pointer type given its pointee type. /// /// By default, performs semantic analysis when building the pointer type. @@ -821,7 +847,7 @@ class TreeTransform { /// \brief Rebuild an unresolved typename type, given the decl that /// the UnresolvedUsingTypenameDecl was transformed to. - QualType RebuildUnresolvedUsingType(Decl *D); + QualType RebuildUnresolvedUsingType(SourceLocation NameLoc, Decl *D); /// \brief Build a new typedef type. QualType RebuildTypedefType(TypedefNameDecl *Typedef) { @@ -871,6 +897,14 @@ class TreeTransform { /*IsDependent*/ false); } + /// By default, builds a new DeducedTemplateSpecializationType with the given + /// deduced type. + QualType RebuildDeducedTemplateSpecializationType(TemplateName Template, + QualType Deduced) { + return SemaRef.Context.getDeducedTemplateSpecializationType( + Template, Deduced, /*IsDependent*/ false); + } + /// \brief Build a new template specialization type. /// /// By default, performs semantic analysis when building the template @@ -885,7 +919,7 @@ class TreeTransform { /// By default, builds a new ParenType type from the inner type. /// Subclasses may override this routine to provide different behavior. QualType RebuildParenType(QualType InnerType) { - return SemaRef.Context.getParenType(InnerType); + return SemaRef.BuildParenType(InnerType); } /// \brief Build a new qualified name type. @@ -912,14 +946,15 @@ class TreeTransform { NestedNameSpecifierLoc QualifierLoc, const IdentifierInfo *Name, SourceLocation NameLoc, - TemplateArgumentListInfo &Args) { + TemplateArgumentListInfo &Args, + bool AllowInjectedClassName) { // Rebuild the template name. // TODO: avoid TemplateName abstraction CXXScopeSpec SS; SS.Adopt(QualifierLoc); TemplateName InstName = getDerived().RebuildTemplateName(SS, *Name, NameLoc, QualType(), - nullptr); + nullptr, AllowInjectedClassName); if (InstName.isNull()) return QualType(); @@ -954,7 +989,8 @@ class TreeTransform { SourceLocation KeywordLoc, NestedNameSpecifierLoc QualifierLoc, const IdentifierInfo *Id, - SourceLocation IdLoc) { + SourceLocation IdLoc, + bool DeducedTSTContext) { CXXScopeSpec SS; SS.Adopt(QualifierLoc); @@ -966,9 +1002,25 @@ class TreeTransform { Id); } - if (Keyword == ETK_None || Keyword == ETK_Typename) - return SemaRef.CheckTypenameType(Keyword, KeywordLoc, QualifierLoc, - *Id, IdLoc); + if (Keyword == ETK_None || Keyword == ETK_Typename) { + QualType T = SemaRef.CheckTypenameType(Keyword, KeywordLoc, QualifierLoc, + *Id, IdLoc); + // If a dependent name resolves to a deduced template specialization type, + // check that we're in one of the syntactic contexts permitting it. + if (!DeducedTSTContext) { + if (auto *Deduced = dyn_cast_or_null( + T.isNull() ? nullptr : T->getContainedDeducedType())) { + SemaRef.Diag(IdLoc, diag::err_dependent_deduced_tst) + << (int)SemaRef.getTemplateNameKindForDiagnostics( + Deduced->getTemplateName()) + << QualType(QualifierLoc.getNestedNameSpecifier()->getAsType(), 0); + if (auto *TD = Deduced->getTemplateName().getAsTemplateDecl()) + SemaRef.Diag(TD->getLocation(), diag::note_template_decl_here); + return QualType(); + } + } + return T; + } TagTypeKind Kind = TypeWithKeyword::getTagTypeKindForKeyword(Keyword); @@ -1084,7 +1136,8 @@ class TreeTransform { const IdentifierInfo &Name, SourceLocation NameLoc, QualType ObjectType, - NamedDecl *FirstQualifierInScope); + NamedDecl *FirstQualifierInScope, + bool AllowInjectedClassName); /// \brief Build a new template name given a nested name specifier and the /// overloaded operator name that is referred to as a template. @@ -1096,7 +1149,8 @@ class TreeTransform { TemplateName RebuildTemplateName(CXXScopeSpec &SS, OverloadedOperatorKind Operator, SourceLocation NameLoc, - QualType ObjectType); + QualType ObjectType, + bool AllowInjectedClassName); /// \brief Build a new template name given a template template parameter pack /// and the @@ -1308,16 +1362,28 @@ class TreeTransform { /// /// By default, performs semantic analysis to build the new statement. /// Subclasses may override this routine to provide different behavior. - StmtResult RebuildCoreturnStmt(SourceLocation CoreturnLoc, Expr *Result) { - return getSema().BuildCoreturnStmt(CoreturnLoc, Result); + StmtResult RebuildCoreturnStmt(SourceLocation CoreturnLoc, Expr *Result, + bool IsImplicit) { + return getSema().BuildCoreturnStmt(CoreturnLoc, Result, IsImplicit); } /// \brief Build a new co_await expression. /// /// By default, performs semantic analysis to build the new expression. /// Subclasses may override this routine to provide different behavior. - ExprResult RebuildCoawaitExpr(SourceLocation CoawaitLoc, Expr *Result) { - return getSema().BuildCoawaitExpr(CoawaitLoc, Result); + ExprResult RebuildCoawaitExpr(SourceLocation CoawaitLoc, Expr *Result, + bool IsImplicit) { + return getSema().BuildResolvedCoawaitExpr(CoawaitLoc, Result, IsImplicit); + } + + /// \brief Build a new co_await expression. + /// + /// By default, performs semantic analysis to build the new expression. + /// Subclasses may override this routine to provide different behavior. + ExprResult RebuildDependentCoawaitExpr(SourceLocation CoawaitLoc, + Expr *Result, + UnresolvedLookupExpr *Lookup) { + return getSema().BuildUnresolvedCoawaitExpr(CoawaitLoc, Result, Lookup); } /// \brief Build a new co_yield expression. @@ -1328,6 +1394,10 @@ class TreeTransform { return getSema().BuildCoyieldExpr(CoyieldLoc, Result); } + StmtResult RebuildCoroutineBodyStmt(CoroutineBodyStmt::CtorArgs Args) { + return getSema().BuildCoroutineBodyStmt(Args); + } + /// \brief Build a new Objective-C \@try statement. /// /// By default, performs semantic analysis to build the new statement. @@ -2928,16 +2998,17 @@ class TreeTransform { ExprResult RebuildObjCIvarRefExpr(Expr *BaseArg, ObjCIvarDecl *Ivar, SourceLocation IvarLoc, bool IsArrow, bool IsFreeIvar) { - // FIXME: We lose track of the IsFreeIvar bit. CXXScopeSpec SS; DeclarationNameInfo NameInfo(Ivar->getDeclName(), IvarLoc); - return getSema().BuildMemberReferenceExpr(BaseArg, BaseArg->getType(), - /*FIXME:*/IvarLoc, IsArrow, - SS, SourceLocation(), - /*FirstQualifierInScope=*/nullptr, - NameInfo, - /*TemplateArgs=*/nullptr, - /*S=*/nullptr); + ExprResult Result = getSema().BuildMemberReferenceExpr( + BaseArg, BaseArg->getType(), + /*FIXME:*/ IvarLoc, IsArrow, SS, SourceLocation(), + /*FirstQualifierInScope=*/nullptr, NameInfo, + /*TemplateArgs=*/nullptr, + /*S=*/nullptr); + if (IsFreeIvar && Result.isUsable()) + cast(Result.get())->setIsFreeIvar(IsFreeIvar); + return Result; } /// \brief Build a new Objective-C property reference expression. @@ -3141,6 +3212,10 @@ class TreeTransform { TypeSourceInfo *TransformTSIInObjectScope(TypeLoc TL, QualType ObjectType, NamedDecl *FirstQualifierInScope, CXXScopeSpec &SS); + + QualType TransformDependentNameType(TypeLocBuilder &TLB, + DependentNameTypeLoc TL, + bool DeducibleTSTContext); }; template @@ -3558,6 +3633,19 @@ ::TransformDeclarationNameInfo(const DeclarationNameInfo &NameInfo) { case DeclarationName::CXXUsingDirective: return NameInfo; + case DeclarationName::CXXDeductionGuideName: { + TemplateDecl *OldTemplate = Name.getCXXDeductionGuideTemplate(); + TemplateDecl *NewTemplate = cast_or_null( + getDerived().TransformDecl(NameInfo.getLoc(), OldTemplate)); + if (!NewTemplate) + return DeclarationNameInfo(); + + DeclarationNameInfo NewNameInfo(NameInfo); + NewNameInfo.setName( + SemaRef.Context.DeclarationNames.getCXXDeductionGuideName(NewTemplate)); + return NewNameInfo; + } + case DeclarationName::CXXConstructorName: case DeclarationName::CXXDestructorName: case DeclarationName::CXXConversionFunctionName: { @@ -3597,7 +3685,8 @@ TreeTransform::TransformTemplateName(CXXScopeSpec &SS, TemplateName Name, SourceLocation NameLoc, QualType ObjectType, - NamedDecl *FirstQualifierInScope) { + NamedDecl *FirstQualifierInScope, + bool AllowInjectedClassName) { if (QualifiedTemplateName *QTN = Name.getAsQualifiedTemplateName()) { TemplateDecl *Template = QTN->getTemplateDecl(); assert(Template && "qualified template name must refer to a template"); @@ -3634,11 +3723,12 @@ TreeTransform::TransformTemplateName(CXXScopeSpec &SS, *DTN->getIdentifier(), NameLoc, ObjectType, - FirstQualifierInScope); + FirstQualifierInScope, + AllowInjectedClassName); } return getDerived().RebuildTemplateName(SS, DTN->getOperator(), NameLoc, - ObjectType); + ObjectType, AllowInjectedClassName); } if (TemplateDecl *Template = Name.getAsTemplateDecl()) { @@ -4030,11 +4120,57 @@ TreeTransform::TransformType(TypeLocBuilder &TLB, TypeLoc T) { llvm_unreachable("unhandled type loc!"); } -/// FIXME: By default, this routine adds type qualifiers only to types -/// that can have qualifiers, and silently suppresses those qualifiers -/// that are not permitted (e.g., qualifiers on reference or function -/// types). This is the right thing for template instantiation, but -/// probably not for other clients. +template +QualType TreeTransform::TransformTypeWithDeducedTST(QualType T) { + if (!isa(T)) + return TransformType(T); + + if (getDerived().AlreadyTransformed(T)) + return T; + TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(T, + getDerived().getBaseLocation()); + TypeSourceInfo *NewDI = getDerived().TransformTypeWithDeducedTST(DI); + return NewDI ? NewDI->getType() : QualType(); +} + +template +TypeSourceInfo * +TreeTransform::TransformTypeWithDeducedTST(TypeSourceInfo *DI) { + if (!isa(DI->getType())) + return TransformType(DI); + + // Refine the base location to the type's location. + TemporaryBase Rebase(*this, DI->getTypeLoc().getBeginLoc(), + getDerived().getBaseEntity()); + if (getDerived().AlreadyTransformed(DI->getType())) + return DI; + + TypeLocBuilder TLB; + + TypeLoc TL = DI->getTypeLoc(); + TLB.reserve(TL.getFullDataSize()); + + Qualifiers Quals; + auto QTL = TL.getAs(); + if (QTL) + TL = QTL.getUnqualifiedLoc(); + + auto DNTL = TL.castAs(); + + QualType Result = getDerived().TransformDependentNameType( + TLB, DNTL, /*DeducedTSTContext*/true); + if (Result.isNull()) + return nullptr; + + if (QTL) { + Result = getDerived().RebuildQualifiedType( + Result, QTL.getBeginLoc(), QTL.getType().getLocalQualifiers()); + TLB.TypeWasModifiedSafely(Result); + } + + return TLB.getTypeSourceInfo(SemaRef.Context, Result); +} + template QualType TreeTransform::TransformQualifiedType(TypeLocBuilder &TLB, @@ -4045,64 +4181,71 @@ TreeTransform::TransformQualifiedType(TypeLocBuilder &TLB, if (Result.isNull()) return QualType(); - // Silently suppress qualifiers if the result type can't be qualified. - // FIXME: this is the right thing for template instantiation, but - // probably not for other clients. - if (Result->isFunctionType() || Result->isReferenceType()) - return Result; + Result = getDerived().RebuildQualifiedType(Result, T.getBeginLoc(), Quals); + + // RebuildQualifiedType might have updated the type, but not in a way + // that invalidates the TypeLoc. (There's no location information for + // qualifiers.) + TLB.TypeWasModifiedSafely(Result); + + return Result; +} + +template +QualType TreeTransform::RebuildQualifiedType(QualType T, + SourceLocation Loc, + Qualifiers Quals) { + // C++ [dcl.fct]p7: + // [When] adding cv-qualifications on top of the function type [...] the + // cv-qualifiers are ignored. + // C++ [dcl.ref]p1: + // when the cv-qualifiers are introduced through the use of a typedef-name + // or decltype-specifier [...] the cv-qualifiers are ignored. + // Note that [dcl.ref]p1 lists all cases in which cv-qualifiers can be + // applied to a reference type. + // FIXME: This removes all qualifiers, not just cv-qualifiers! + if (T->isFunctionType() || T->isReferenceType()) + return T; // Suppress Objective-C lifetime qualifiers if they don't make sense for the // resulting type. if (Quals.hasObjCLifetime()) { - if (!Result->isObjCLifetimeType() && !Result->isDependentType()) + if (!T->isObjCLifetimeType() && !T->isDependentType()) Quals.removeObjCLifetime(); - else if (Result.getObjCLifetime()) { + else if (T.getObjCLifetime()) { // Objective-C ARC: // A lifetime qualifier applied to a substituted template parameter // overrides the lifetime qualifier from the template argument. const AutoType *AutoTy; if (const SubstTemplateTypeParmType *SubstTypeParam - = dyn_cast(Result)) { + = dyn_cast(T)) { QualType Replacement = SubstTypeParam->getReplacementType(); Qualifiers Qs = Replacement.getQualifiers(); Qs.removeObjCLifetime(); - Replacement - = SemaRef.Context.getQualifiedType(Replacement.getUnqualifiedType(), - Qs); - Result = SemaRef.Context.getSubstTemplateTypeParmType( - SubstTypeParam->getReplacedParameter(), - Replacement); - TLB.TypeWasModifiedSafely(Result); - } else if ((AutoTy = dyn_cast(Result)) && AutoTy->isDeduced()) { + Replacement = SemaRef.Context.getQualifiedType( + Replacement.getUnqualifiedType(), Qs); + T = SemaRef.Context.getSubstTemplateTypeParmType( + SubstTypeParam->getReplacedParameter(), Replacement); + } else if ((AutoTy = dyn_cast(T)) && AutoTy->isDeduced()) { // 'auto' types behave the same way as template parameters. QualType Deduced = AutoTy->getDeducedType(); Qualifiers Qs = Deduced.getQualifiers(); Qs.removeObjCLifetime(); - Deduced = SemaRef.Context.getQualifiedType(Deduced.getUnqualifiedType(), - Qs); - Result = SemaRef.Context.getAutoType(Deduced, AutoTy->getKeyword(), - AutoTy->isDependentType()); - TLB.TypeWasModifiedSafely(Result); + Deduced = + SemaRef.Context.getQualifiedType(Deduced.getUnqualifiedType(), Qs); + T = SemaRef.Context.getAutoType(Deduced, AutoTy->getKeyword(), + AutoTy->isDependentType()); } else { // Otherwise, complain about the addition of a qualifier to an // already-qualified type. - SourceRange R = T.getUnqualifiedLoc().getSourceRange(); - SemaRef.Diag(R.getBegin(), diag::err_attr_objc_ownership_redundant) - << Result << R; - + // FIXME: Why is this check not in Sema::BuildQualifiedType? + SemaRef.Diag(Loc, diag::err_attr_objc_ownership_redundant) << T; Quals.removeObjCLifetime(); } } } - if (!Quals.empty()) { - Result = SemaRef.BuildQualifiedType(Result, T.getBeginLoc(), Quals); - // BuildQualifiedType might not add qualifiers if they are invalid. - if (Result.hasLocalQualifiers()) - TLB.push(Result); - // No location information to preserve. - } - return Result; + return SemaRef.BuildQualifiedType(T, Loc, Quals); } template @@ -4148,11 +4291,9 @@ TypeSourceInfo *TreeTransform::TransformTSIInObjectScope( TemplateSpecializationTypeLoc SpecTL = TL.castAs(); - TemplateName Template - = getDerived().TransformTemplateName(SS, - SpecTL.getTypePtr()->getTemplateName(), - SpecTL.getTemplateNameLoc(), - ObjectType, UnqualLookup); + TemplateName Template = getDerived().TransformTemplateName( + SS, SpecTL.getTypePtr()->getTemplateName(), SpecTL.getTemplateNameLoc(), + ObjectType, UnqualLookup, /*AllowInjectedClassName*/true); if (Template.isNull()) return nullptr; @@ -4166,7 +4307,8 @@ TypeSourceInfo *TreeTransform::TransformTSIInObjectScope( = getDerived().RebuildTemplateName(SS, *SpecTL.getTypePtr()->getIdentifier(), SpecTL.getTemplateNameLoc(), - ObjectType, UnqualLookup); + ObjectType, UnqualLookup, + /*AllowInjectedClassName*/true); if (Template.isNull()) return nullptr; @@ -4477,8 +4619,15 @@ TreeTransform::TransformVariableArrayType(TypeLocBuilder &TLB, if (ElementType.isNull()) return QualType(); - ExprResult SizeResult - = getDerived().TransformExpr(T->getSizeExpr()); + ExprResult SizeResult; + { + EnterExpressionEvaluationContext Context(SemaRef, + Sema::PotentiallyEvaluated); + SizeResult = getDerived().TransformExpr(T->getSizeExpr()); + } + if (SizeResult.isInvalid()) + return QualType(); + SizeResult = SemaRef.ActOnFinishFullExpr(SizeResult.get()); if (SizeResult.isInvalid()) return QualType(); @@ -5019,6 +5168,7 @@ QualType TreeTransform::TransformFunctionProtoType( NewTL.setLocalRangeBegin(TL.getLocalRangeBegin()); NewTL.setLParenLoc(TL.getLParenLoc()); NewTL.setRParenLoc(TL.getRParenLoc()); + NewTL.setExceptionSpecRange(TL.getExceptionSpecRange()); NewTL.setLocalRangeEnd(TL.getLocalRangeEnd()); for (unsigned i = 0, e = NewTL.getNumParams(); i != e; ++i) NewTL.setParam(i, ParamDecls[i]); @@ -5161,7 +5311,7 @@ TreeTransform::TransformUnresolvedUsingType(TypeLocBuilder &TLB, QualType Result = TL.getType(); if (getDerived().AlwaysRebuild() || D != T->getDecl()) { - Result = getDerived().RebuildUnresolvedUsingType(D); + Result = getDerived().RebuildUnresolvedUsingType(TL.getNameLoc(), D); if (Result.isNull()) return QualType(); } @@ -5335,6 +5485,37 @@ QualType TreeTransform::TransformAutoType(TypeLocBuilder &TLB, return Result; } +template +QualType TreeTransform::TransformDeducedTemplateSpecializationType( + TypeLocBuilder &TLB, DeducedTemplateSpecializationTypeLoc TL) { + const DeducedTemplateSpecializationType *T = TL.getTypePtr(); + + CXXScopeSpec SS; + TemplateName TemplateName = getDerived().TransformTemplateName( + SS, T->getTemplateName(), TL.getTemplateNameLoc()); + if (TemplateName.isNull()) + return QualType(); + + QualType OldDeduced = T->getDeducedType(); + QualType NewDeduced; + if (!OldDeduced.isNull()) { + NewDeduced = getDerived().TransformType(OldDeduced); + if (NewDeduced.isNull()) + return QualType(); + } + + QualType Result = getDerived().RebuildDeducedTemplateSpecializationType( + TemplateName, NewDeduced); + if (Result.isNull()) + return QualType(); + + DeducedTemplateSpecializationTypeLoc NewTL = + TLB.push(Result); + NewTL.setTemplateNameLoc(TL.getTemplateNameLoc()); + + return Result; +} + template QualType TreeTransform::TransformRecordType(TypeLocBuilder &TLB, RecordTypeLoc TL) { @@ -5805,8 +5986,14 @@ TreeTransform::TransformParenType(TypeLocBuilder &TLB, } template -QualType TreeTransform::TransformDependentNameType(TypeLocBuilder &TLB, - DependentNameTypeLoc TL) { +QualType TreeTransform::TransformDependentNameType( + TypeLocBuilder &TLB, DependentNameTypeLoc TL) { + return TransformDependentNameType(TLB, TL, false); +} + +template +QualType TreeTransform::TransformDependentNameType( + TypeLocBuilder &TLB, DependentNameTypeLoc TL, bool DeducedTSTContext) { const DependentNameType *T = TL.getTypePtr(); NestedNameSpecifierLoc QualifierLoc @@ -5819,7 +6006,8 @@ QualType TreeTransform::TransformDependentNameType(TypeLocBuilder &TLB, TL.getElaboratedKeywordLoc(), QualifierLoc, T->getIdentifier(), - TL.getNameLoc()); + TL.getNameLoc(), + DeducedTSTContext); if (Result.isNull()) return QualType(); @@ -5873,12 +6061,10 @@ TransformDependentTemplateSpecializationType(TypeLocBuilder &TLB, NewTemplateArgs)) return QualType(); - QualType Result - = getDerived().RebuildDependentTemplateSpecializationType(T->getKeyword(), - QualifierLoc, - T->getIdentifier(), - TL.getTemplateNameLoc(), - NewTemplateArgs); + QualType Result = getDerived().RebuildDependentTemplateSpecializationType( + T->getKeyword(), QualifierLoc, T->getIdentifier(), + TL.getTemplateNameLoc(), NewTemplateArgs, + /*AllowInjectedClassName*/ false); if (Result.isNull()) return QualType(); @@ -6663,7 +6849,91 @@ StmtResult TreeTransform::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) { // The coroutine body should be re-formed by the caller if necessary. // FIXME: The coroutine body is always rebuilt by ActOnFinishFunctionBody - return getDerived().TransformStmt(S->getBody()); + CoroutineBodyStmt::CtorArgs BodyArgs; + + auto *ScopeInfo = SemaRef.getCurFunction(); + auto *FD = cast(SemaRef.CurContext); + assert(ScopeInfo && !ScopeInfo->CoroutinePromise && + ScopeInfo->NeedsCoroutineSuspends && + ScopeInfo->CoroutineSuspends.first == nullptr && + ScopeInfo->CoroutineSuspends.second == nullptr && + ScopeInfo->CoroutineStmts.empty() && "expected clean scope info"); + + // Set that we have (possibly-invalid) suspend points before we do anything + // that may fail. + ScopeInfo->setNeedsCoroutineSuspends(false); + + // The new CoroutinePromise object needs to be built and put into the current + // FunctionScopeInfo before any transformations or rebuilding occurs. + auto *Promise = S->getPromiseDecl(); + auto *NewPromise = SemaRef.buildCoroutinePromise(FD->getLocation()); + if (!NewPromise) + return StmtError(); + getDerived().transformedLocalDecl(Promise, NewPromise); + ScopeInfo->CoroutinePromise = NewPromise; + StmtResult PromiseStmt = SemaRef.ActOnDeclStmt( + SemaRef.ConvertDeclToDeclGroup(NewPromise), + FD->getLocation(), FD->getLocation()); + assert(!PromiseStmt.isInvalid()); + BodyArgs.Promise = PromiseStmt.get(); + + // Transform the implicit coroutine statements we built during the initial + // parse. + StmtResult InitSuspend = getDerived().TransformStmt(S->getInitSuspendStmt()); + if (InitSuspend.isInvalid()) + return StmtError(); + StmtResult FinalSuspend = + getDerived().TransformStmt(S->getFinalSuspendStmt()); + if (FinalSuspend.isInvalid()) + return StmtError(); + ScopeInfo->setCoroutineSuspends(InitSuspend.get(), FinalSuspend.get()); + assert(isa(InitSuspend.get()) && isa(FinalSuspend.get())); + BodyArgs.InitialSuspend = cast(InitSuspend.get()); + BodyArgs.FinalSuspend = cast(FinalSuspend.get()); + + StmtResult BodyRes = getDerived().TransformStmt(S->getBody()); + if (BodyRes.isInvalid()) + return StmtError(); + BodyArgs.Body = BodyRes.get(); + + if (S->getFallthroughHandler()) { + StmtResult Res = getDerived().TransformStmt(S->getFallthroughHandler()); + if (Res.isInvalid()) + return StmtError(); + BodyArgs.OnFallthrough = Res.get(); + } + + if (S->getExceptionHandler()) { + StmtResult Res = getDerived().TransformStmt(S->getExceptionHandler()); + if (Res.isInvalid()) + return StmtError(); + BodyArgs.OnException = Res.get(); + } + + // Transform any additional statements we may have already built + if (S->getAllocate() && S->getDeallocate()) { + ExprResult AllocRes = getDerived().TransformExpr(S->getAllocate()); + if (AllocRes.isInvalid()) + return StmtError(); + BodyArgs.Allocate = AllocRes.get(); + + ExprResult DeallocRes = getDerived().TransformExpr(S->getDeallocate()); + if (DeallocRes.isInvalid()) + return StmtError(); + BodyArgs.Deallocate = DeallocRes.get(); + } + + Expr *ReturnObject = S->getReturnValueInit(); + if (ReturnObject) { + ExprResult Res = getDerived().TransformInitializer(ReturnObject, + /*NoCopyInit*/false); + if (Res.isInvalid()) + return StmtError(); + BodyArgs.ReturnValue = Res.get(); + } + + // Do a partial rebuild of the coroutine body and stash it in the ScopeInfo + return getDerived().RebuildCoroutineBodyStmt(BodyArgs); } template @@ -6676,7 +6946,8 @@ TreeTransform::TransformCoreturnStmt(CoreturnStmt *S) { // Always rebuild; we don't know if this needs to be injected into a new // context or if the promise type has changed. - return getDerived().RebuildCoreturnStmt(S->getKeywordLoc(), Result.get()); + return getDerived().RebuildCoreturnStmt(S->getKeywordLoc(), Result.get(), + S->isImplicit()); } template @@ -6689,7 +6960,29 @@ TreeTransform::TransformCoawaitExpr(CoawaitExpr *E) { // Always rebuild; we don't know if this needs to be injected into a new // context or if the promise type has changed. - return getDerived().RebuildCoawaitExpr(E->getKeywordLoc(), Result.get()); + return getDerived().RebuildCoawaitExpr(E->getKeywordLoc(), Result.get(), + E->isImplicit()); +} + +template +ExprResult +TreeTransform::TransformDependentCoawaitExpr(DependentCoawaitExpr *E) { + ExprResult OperandResult = getDerived().TransformInitializer(E->getOperand(), + /*NotCopyInit*/ false); + if (OperandResult.isInvalid()) + return ExprError(); + + ExprResult LookupResult = getDerived().TransformUnresolvedLookupExpr( + E->getOperatorCoawaitLookup()); + + if (LookupResult.isInvalid()) + return ExprError(); + + // Always rebuild; we don't know if this needs to be injected into a new + // context or if the promise type has changed. + return getDerived().RebuildDependentCoawaitExpr( + E->getKeywordLoc(), OperandResult.get(), + cast(LookupResult.get())); } template @@ -7233,8 +7526,12 @@ StmtResult TreeTransform::TransformOMPExecutableDirective( StmtResult Body; { Sema::CompoundScopeRAII CompoundScope(getSema()); - Body = getDerived().TransformStmt( - cast(D->getAssociatedStmt())->getCapturedStmt()); + int ThisCaptureLevel = + Sema::getOpenMPCaptureLevels(D->getDirectiveKind()); + Stmt *CS = D->getAssociatedStmt(); + while (--ThisCaptureLevel >= 0) + CS = cast(CS)->getCapturedStmt(); + Body = getDerived().TransformStmt(CS); } AssociatedStmt = getDerived().getSema().ActOnOpenMPRegionEnd(Body, TClauses); @@ -7727,6 +8024,66 @@ StmtResult TreeTransform::TransformOMPTeamsDistributeParallelForDirecti return Res; } +template +StmtResult TreeTransform::TransformOMPTargetTeamsDirective( + OMPTargetTeamsDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock(OMPD_target_teams, DirName, + nullptr, D->getLocStart()); + auto Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + +template +StmtResult TreeTransform::TransformOMPTargetTeamsDistributeDirective( + OMPTargetTeamsDistributeDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock(OMPD_target_teams_distribute, + DirName, nullptr, D->getLocStart()); + auto Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + +template +StmtResult +TreeTransform::TransformOMPTargetTeamsDistributeParallelForDirective( + OMPTargetTeamsDistributeParallelForDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock( + OMPD_target_teams_distribute_parallel_for, DirName, nullptr, + D->getLocStart()); + auto Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + +template +StmtResult TreeTransform:: + TransformOMPTargetTeamsDistributeParallelForSimdDirective( + OMPTargetTeamsDistributeParallelForSimdDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock( + OMPD_target_teams_distribute_parallel_for_simd, DirName, nullptr, + D->getLocStart()); + auto Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + +template +StmtResult +TreeTransform::TransformOMPTargetTeamsDistributeSimdDirective( + OMPTargetTeamsDistributeSimdDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock( + OMPD_target_teams_distribute_simd, DirName, nullptr, D->getLocStart()); + auto Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + //===----------------------------------------------------------------------===// // OpenMP clause transformation @@ -8753,12 +9110,18 @@ TreeTransform::TransformMemberExpr(MemberExpr *E) { // base (and therefore couldn't do the check) and a // nested-name-qualifier (and therefore could do the lookup). NamedDecl *FirstQualifierInScope = nullptr; + DeclarationNameInfo MemberNameInfo = E->getMemberNameInfo(); + if (MemberNameInfo.getName()) { + MemberNameInfo = getDerived().TransformDeclarationNameInfo(MemberNameInfo); + if (!MemberNameInfo.getName()) + return ExprError(); + } return getDerived().RebuildMemberExpr(Base.get(), FakeOperatorLoc, E->isArrow(), QualifierLoc, TemplateKWLoc, - E->getMemberNameInfo(), + MemberNameInfo, Member, FoundDecl, (E->hasExplicitTemplateArgs() @@ -9363,7 +9726,8 @@ template ExprResult TreeTransform::TransformCXXFunctionalCastExpr( CXXFunctionalCastExpr *E) { - TypeSourceInfo *Type = getDerived().TransformType(E->getTypeInfoAsWritten()); + TypeSourceInfo *Type = + getDerived().TransformTypeWithDeducedTST(E->getTypeInfoAsWritten()); if (!Type) return ExprError(); @@ -9552,8 +9916,8 @@ template ExprResult TreeTransform::TransformCXXNewExpr(CXXNewExpr *E) { // Transform the type that we're allocating - TypeSourceInfo *AllocTypeInfo - = getDerived().TransformType(E->getAllocatedTypeSourceInfo()); + TypeSourceInfo *AllocTypeInfo = + getDerived().TransformTypeWithDeducedTST(E->getAllocatedTypeSourceInfo()); if (!AllocTypeInfo) return ExprError(); @@ -9784,44 +10148,72 @@ TreeTransform::TransformCXXPseudoDestructorExpr( Destroyed); } -template -ExprResult -TreeTransform::TransformUnresolvedLookupExpr( - UnresolvedLookupExpr *Old) { - LookupResult R(SemaRef, Old->getName(), Old->getNameLoc(), - Sema::LookupOrdinaryName); - +template +bool TreeTransform::TransformOverloadExprDecls(OverloadExpr *Old, + bool RequiresADL, + LookupResult &R) { // Transform all the decls. - for (UnresolvedLookupExpr::decls_iterator I = Old->decls_begin(), - E = Old->decls_end(); I != E; ++I) { - NamedDecl *InstD = static_cast( - getDerived().TransformDecl(Old->getNameLoc(), - *I)); + bool AllEmptyPacks = true; + for (auto *OldD : Old->decls()) { + Decl *InstD = getDerived().TransformDecl(Old->getNameLoc(), OldD); if (!InstD) { // Silently ignore these if a UsingShadowDecl instantiated to nothing. // This can happen because of dependent hiding. - if (isa(*I)) + if (isa(OldD)) continue; else { R.clear(); - return ExprError(); + return true; } } + // Expand using pack declarations. + NamedDecl *SingleDecl = cast(InstD); + ArrayRef Decls = SingleDecl; + if (auto *UPD = dyn_cast(InstD)) + Decls = UPD->expansions(); + // Expand using declarations. - if (isa(InstD)) { - UsingDecl *UD = cast(InstD); - for (auto *I : UD->shadows()) - R.addDecl(I); - continue; + for (auto *D : Decls) { + if (auto *UD = dyn_cast(D)) { + for (auto *SD : UD->shadows()) + R.addDecl(SD); + } else { + R.addDecl(D); + } } - R.addDecl(InstD); + AllEmptyPacks &= Decls.empty(); + }; + + // C++ [temp.res]/8.4.2: + // The program is ill-formed, no diagnostic required, if [...] lookup for + // a name in the template definition found a using-declaration, but the + // lookup in the corresponding scope in the instantiation odoes not find + // any declarations because the using-declaration was a pack expansion and + // the corresponding pack is empty + if (AllEmptyPacks && !RequiresADL) { + getSema().Diag(Old->getNameLoc(), diag::err_using_pack_expansion_empty) + << isa(Old) << Old->getNameInfo().getName(); + return true; } // Resolve a kind, but don't do any further analysis. If it's // ambiguous, the callee needs to deal with it. R.resolveKind(); + return false; +} + +template +ExprResult +TreeTransform::TransformUnresolvedLookupExpr( + UnresolvedLookupExpr *Old) { + LookupResult R(SemaRef, Old->getName(), Old->getNameLoc(), + Sema::LookupOrdinaryName); + + // Transform the declaration set. + if (TransformOverloadExprDecls(Old, Old->requiresADL(), R)) + return ExprError(); // Rebuild the nested-name qualifier, if present. CXXScopeSpec SS; @@ -10236,7 +10628,8 @@ template ExprResult TreeTransform::TransformCXXTemporaryObjectExpr( CXXTemporaryObjectExpr *E) { - TypeSourceInfo *T = getDerived().TransformType(E->getTypeSourceInfo()); + TypeSourceInfo *T = + getDerived().TransformTypeWithDeducedTST(E->getTypeSourceInfo()); if (!T) return ExprError(); @@ -10356,6 +10749,18 @@ TreeTransform::TransformLambdaExpr(LambdaExpr *E) { LSI->CallOperator = NewCallOperator; + for (unsigned I = 0, NumParams = NewCallOperator->getNumParams(); + I != NumParams; ++I) { + auto *P = NewCallOperator->getParamDecl(I); + if (P->hasUninstantiatedDefaultArg()) { + EnterExpressionEvaluationContext Eval( + getSema(), Sema::PotentiallyEvaluatedIfUsed, P); + ExprResult R = getDerived().TransformExpr( + E->getCallOperator()->getParamDecl(I)->getDefaultArg()); + P->setDefaultArg(R.get()); + } + } + getDerived().transformAttrs(E->getCallOperator(), NewCallOperator); getDerived().transformedLocalDecl(E->getCallOperator(), NewCallOperator); @@ -10521,7 +10926,8 @@ template ExprResult TreeTransform::TransformCXXUnresolvedConstructExpr( CXXUnresolvedConstructExpr *E) { - TypeSourceInfo *T = getDerived().TransformType(E->getTypeSourceInfo()); + TypeSourceInfo *T = + getDerived().TransformTypeWithDeducedTST(E->getTypeSourceInfo()); if (!T) return ExprError(); @@ -10677,35 +11083,9 @@ TreeTransform::TransformUnresolvedMemberExpr(UnresolvedMemberExpr *Old) LookupResult R(SemaRef, Old->getMemberNameInfo(), Sema::LookupOrdinaryName); - // Transform all the decls. - for (UnresolvedMemberExpr::decls_iterator I = Old->decls_begin(), - E = Old->decls_end(); I != E; ++I) { - NamedDecl *InstD = static_cast( - getDerived().TransformDecl(Old->getMemberLoc(), - *I)); - if (!InstD) { - // Silently ignore these if a UsingShadowDecl instantiated to nothing. - // This can happen because of dependent hiding. - if (isa(*I)) - continue; - else { - R.clear(); - return ExprError(); - } - } - - // Expand using declarations. - if (isa(InstD)) { - UsingDecl *UD = cast(InstD); - for (auto *I : UD->shadows()) - R.addDecl(I); - continue; - } - - R.addDecl(InstD); - } - - R.resolveKind(); + // Transform the declaration set. + if (TransformOverloadExprDecls(Old, /*RequiresADL*/false, R)) + return ExprError(); // Determine the naming class. if (Old->getNamingClass()) { @@ -11821,21 +12201,48 @@ QualType TreeTransform::RebuildFunctionNoProtoType(QualType T) { } template -QualType TreeTransform::RebuildUnresolvedUsingType(Decl *D) { +QualType TreeTransform::RebuildUnresolvedUsingType(SourceLocation Loc, + Decl *D) { assert(D && "no decl found"); if (D->isInvalidDecl()) return QualType(); // FIXME: Doesn't account for ObjCInterfaceDecl! TypeDecl *Ty; - if (isa(D)) { - UsingDecl *Using = cast(D); + if (auto *UPD = dyn_cast(D)) { + // A valid resolved using typename pack expansion decl can have multiple + // UsingDecls, but they must each have exactly one type, and it must be + // the same type in every case. But we must have at least one expansion! + if (UPD->expansions().empty()) { + getSema().Diag(Loc, diag::err_using_pack_expansion_empty) + << UPD->isCXXClassMember() << UPD; + return QualType(); + } + + // We might still have some unresolved types. Try to pick a resolved type + // if we can. The final instantiation will check that the remaining + // unresolved types instantiate to the type we pick. + QualType FallbackT; + QualType T; + for (auto *E : UPD->expansions()) { + QualType ThisT = RebuildUnresolvedUsingType(Loc, E); + if (ThisT.isNull()) + continue; + else if (ThisT->getAs()) + FallbackT = ThisT; + else if (T.isNull()) + T = ThisT; + else + assert(getSema().Context.hasSameType(ThisT, T) && + "mismatched resolved types in using pack expansion"); + } + return T.isNull() ? FallbackT : T; + } else if (auto *Using = dyn_cast(D)) { assert(Using->hasTypename() && "UnresolvedUsingTypenameDecl transformed to non-typename using"); // A valid resolved using typename decl points to exactly one type decl. assert(++Using->shadow_begin() == Using->shadow_end()); Ty = cast((*Using->shadow_begin())->getTargetDecl()); - } else { assert(isa(D) && "UnresolvedUsingTypenameDecl transformed to non-using decl"); @@ -11906,7 +12313,8 @@ TreeTransform::RebuildTemplateName(CXXScopeSpec &SS, const IdentifierInfo &Name, SourceLocation NameLoc, QualType ObjectType, - NamedDecl *FirstQualifierInScope) { + NamedDecl *FirstQualifierInScope, + bool AllowInjectedClassName) { UnqualifiedId TemplateName; TemplateName.setIdentifier(&Name, NameLoc); Sema::TemplateTy Template; @@ -11915,7 +12323,7 @@ TreeTransform::RebuildTemplateName(CXXScopeSpec &SS, SS, TemplateKWLoc, TemplateName, ParsedType::make(ObjectType), /*EnteringContext=*/false, - Template); + Template, AllowInjectedClassName); return Template.get(); } @@ -11924,7 +12332,8 @@ TemplateName TreeTransform::RebuildTemplateName(CXXScopeSpec &SS, OverloadedOperatorKind Operator, SourceLocation NameLoc, - QualType ObjectType) { + QualType ObjectType, + bool AllowInjectedClassName) { UnqualifiedId Name; // FIXME: Bogus location information. SourceLocation SymbolLocations[3] = { NameLoc, NameLoc, NameLoc }; @@ -11935,7 +12344,7 @@ TreeTransform::RebuildTemplateName(CXXScopeSpec &SS, SS, TemplateKWLoc, Name, ParsedType::make(ObjectType), /*EnteringContext=*/false, - Template); + Template, AllowInjectedClassName); return Template.get(); } diff --git a/tools/clang/lib/Serialization/ASTCommon.cpp b/tools/clang/lib/Serialization/ASTCommon.cpp index d019df1..351e7b6 100644 --- a/tools/clang/lib/Serialization/ASTCommon.cpp +++ b/tools/clang/lib/Serialization/ASTCommon.cpp @@ -153,9 +153,6 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) { case BuiltinType::OCLQueue: ID = PREDEF_TYPE_QUEUE_ID; break; - case BuiltinType::OCLNDRange: - ID = PREDEF_TYPE_NDRANGE_ID; - break; case BuiltinType::OCLReserveID: ID = PREDEF_TYPE_RESERVE_ID_ID; break; @@ -260,6 +257,7 @@ bool serialization::isRedeclarableDeclKind(unsigned Kind) { case Decl::VarTemplateSpecialization: case Decl::VarTemplatePartialSpecialization: case Decl::Function: + case Decl::CXXDeductionGuide: case Decl::CXXMethod: case Decl::CXXConstructor: case Decl::CXXDestructor: @@ -291,6 +289,7 @@ bool serialization::isRedeclarableDeclKind(unsigned Kind) { case Decl::NonTypeTemplateParm: case Decl::TemplateTemplateParm: case Decl::Using: + case Decl::UsingPack: case Decl::ObjCMethod: case Decl::ObjCCategory: case Decl::ObjCCategoryImpl: diff --git a/tools/clang/lib/Serialization/ASTReader.cpp b/tools/clang/lib/Serialization/ASTReader.cpp index 43b9529..104e923 100644 --- a/tools/clang/lib/Serialization/ASTReader.cpp +++ b/tools/clang/lib/Serialization/ASTReader.cpp @@ -26,6 +26,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/NestedNameSpecifier.h" +#include "clang/AST/ODRHash.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLocVisitor.h" @@ -72,6 +73,7 @@ #include "llvm/Bitcode/BitstreamReader.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -481,7 +483,7 @@ bool PCHValidator::ReadDiagnosticOptions( // Note: ModuleMgr.rbegin() may not be the current module, but it must be in // the transitive closure of its imports, since unrelated modules cannot be // imported until after this module finishes validation. - ModuleFile *TopImport = *ModuleMgr.rbegin(); + ModuleFile *TopImport = &*ModuleMgr.rbegin(); while (!TopImport->ImportedBy.empty()) TopImport = TopImport->ImportedBy[0]; if (TopImport->Kind != MK_ImplicitModule) @@ -735,7 +737,7 @@ ASTSelectorLookupTrait::ReadKeyDataLength(const unsigned char*& d) { return std::make_pair(KeyLen, DataLen); } -ASTSelectorLookupTrait::internal_key_type +ASTSelectorLookupTrait::internal_key_type ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) { using namespace llvm::support; SelectorTable &SelTable = Reader.getContext().Selectors; @@ -756,8 +758,8 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) { return SelTable.getSelector(N, Args.data()); } -ASTSelectorLookupTrait::data_type -ASTSelectorLookupTrait::ReadData(Selector, const unsigned char* d, +ASTSelectorLookupTrait::data_type +ASTSelectorLookupTrait::ReadData(Selector, const unsigned char* d, unsigned DataLen) { using namespace llvm::support; @@ -943,6 +945,10 @@ DeclarationNameKey::DeclarationNameKey(DeclarationName Name) case DeclarationName::CXXLiteralOperatorName: Data = (uint64_t)Name.getCXXLiteralIdentifier(); break; + case DeclarationName::CXXDeductionGuideName: + Data = (uint64_t)Name.getCXXDeductionGuideTemplate() + ->getDeclName().getAsIdentifierInfo(); + break; case DeclarationName::CXXConstructorName: case DeclarationName::CXXDestructorName: case DeclarationName::CXXConversionFunctionName: @@ -959,6 +965,7 @@ unsigned DeclarationNameKey::getHash() const { switch (Kind) { case DeclarationName::Identifier: case DeclarationName::CXXLiteralOperatorName: + case DeclarationName::CXXDeductionGuideName: ID.AddString(((IdentifierInfo*)Data)->getName()); break; case DeclarationName::ObjCZeroArgSelector: @@ -1002,6 +1009,8 @@ ASTDeclContextNameLookupTrait::ReadKey(const unsigned char *d, unsigned) { uint64_t Data; switch (Kind) { case DeclarationName::Identifier: + case DeclarationName::CXXLiteralOperatorName: + case DeclarationName::CXXDeductionGuideName: Data = (uint64_t)Reader.getLocalIdentifier( F, endian::readNext(d)); break; @@ -1016,10 +1025,6 @@ ASTDeclContextNameLookupTrait::ReadKey(const unsigned char *d, unsigned) { case DeclarationName::CXXOperatorName: Data = *d++; // OverloadedOperatorKind break; - case DeclarationName::CXXLiteralOperatorName: - Data = (uint64_t)Reader.getLocalIdentifier( - F, endian::readNext(d)); - break; case DeclarationName::CXXConstructorName: case DeclarationName::CXXDestructorName: case DeclarationName::CXXConversionFunctionName: @@ -1103,7 +1108,7 @@ bool ASTReader::ReadVisibleDeclContextStorage(ModuleFile &M, return false; } -void ASTReader::Error(StringRef Msg) { +void ASTReader::Error(StringRef Msg) const { Error(diag::err_fe_pch_malformed, Msg); if (Context.getLangOpts().Modules && !Diags.isDiagnosticInFlight() && !PP.getHeaderSearchInfo().getModuleCachePath().empty()) { @@ -1113,7 +1118,7 @@ void ASTReader::Error(StringRef Msg) { } void ASTReader::Error(unsigned DiagID, - StringRef Arg1, StringRef Arg2) { + StringRef Arg1, StringRef Arg2) const { if (Diags.isDiagnosticInFlight()) Diags.SetDelayedDiagnostic(DiagID, Arg1, Arg2); else @@ -1196,7 +1201,7 @@ bool ASTReader::ReadSourceManagerBlock(ModuleFile &F) { RecordData Record; while (true) { llvm::BitstreamEntry E = SLocEntryCursor.advanceSkippingSubblocks(); - + switch (E.Kind) { case llvm::BitstreamEntry::SubBlock: // Handled for us already. case llvm::BitstreamEntry::Error: @@ -1208,7 +1213,7 @@ bool ASTReader::ReadSourceManagerBlock(ModuleFile &F) { // The interesting case. break; } - + // Read a record. Record.clear(); StringRef Blob; @@ -1278,10 +1283,15 @@ bool ASTReader::ReadSLocEntry(int ID) { unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob); if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) { + if (!llvm::zlib::isAvailable()) { + Error("zlib is not available"); + return nullptr; + } SmallString<0> Uncompressed; - if (llvm::zlib::uncompress(Blob, Uncompressed, Record[0]) != - llvm::zlib::StatusOK) { - Error("could not decompress embedded file contents"); + if (llvm::Error E = + llvm::zlib::uncompress(Blob, Uncompressed, Record[0])) { + Error("could not decompress embedded file contents: " + + llvm::toString(std::move(E))); return nullptr; } return llvm::MemoryBuffer::getMemBufferCopy(Uncompressed, Name); @@ -1304,7 +1314,7 @@ bool ASTReader::ReadSLocEntry(int ID) { Error("incorrectly-formatted source location entry in AST file"); return true; } - + RecordData Record; StringRef Blob; switch (SLocEntryCursor.readRecord(Entry.ID, Record, &Blob)) { @@ -1420,7 +1430,7 @@ std::pair ASTReader::getModuleImportLoc(int ID) { SourceLocation ASTReader::getImportLocation(ModuleFile *F) { if (F->ImportLoc.isValid()) return F->ImportLoc; - + // Otherwise we have a PCH. It's considered to be "imported" at the first // location of its includer. if (F->ImportedBy.empty() || !F->ImportedBy[0]) { @@ -1482,7 +1492,7 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) { // be able to reseek within the block and read entries. unsigned Flags = BitstreamCursor::AF_DontPopBlockAtEnd; llvm::BitstreamEntry Entry = Stream.advanceSkippingSubblocks(Flags); - + switch (Entry.Kind) { case llvm::BitstreamEntry::SubBlock: // Handled for us already. case llvm::BitstreamEntry::Error: @@ -1574,13 +1584,17 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) { } } -PreprocessedEntityID -ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M, unsigned LocalID) const { - ContinuousRangeMap::const_iterator +PreprocessedEntityID +ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M, + unsigned LocalID) const { + if (!M.ModuleOffsetMap.empty()) + ReadModuleOffsetMap(M); + + ContinuousRangeMap::const_iterator I = M.PreprocessedEntityRemap.find(LocalID - NUM_PREDEF_PP_ENTITY_IDS); - assert(I != M.PreprocessedEntityRemap.end() + assert(I != M.PreprocessedEntityRemap.end() && "Invalid index into preprocessed entity index remap"); - + return LocalID + I->second; } @@ -1588,21 +1602,21 @@ unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) { return llvm::hash_combine(ikey.Size, ikey.ModTime); } -HeaderFileInfoTrait::internal_key_type +HeaderFileInfoTrait::internal_key_type HeaderFileInfoTrait::GetInternalKey(const FileEntry *FE) { internal_key_type ikey = {FE->getSize(), M.HasTimestamps ? FE->getModificationTime() : 0, FE->getName(), /*Imported*/ false}; return ikey; } - + bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, internal_key_ref b) { if (a.Size != b.Size || (a.ModTime && b.ModTime && a.ModTime != b.ModTime)) return false; if (llvm::sys::path::is_absolute(a.Filename) && a.Filename == b.Filename) return true; - + // Determine whether the actual files are equivalent. FileManager &FileMgr = Reader.getFileManager(); auto GetFile = [&](const internal_key_type &Key) -> const FileEntry* { @@ -1618,7 +1632,7 @@ bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, internal_key_ref b) { const FileEntry *FEB = GetFile(b); return FEA && FEA == FEB; } - + std::pair HeaderFileInfoTrait::ReadKeyDataLength(const unsigned char*& d) { using namespace llvm::support; @@ -1638,7 +1652,7 @@ HeaderFileInfoTrait::ReadKey(const unsigned char *d, unsigned) { return ikey; } -HeaderFileInfoTrait::data_type +HeaderFileInfoTrait::data_type HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, unsigned DataLen) { const unsigned char *End = d + DataLen; @@ -1658,7 +1672,7 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, M, endian::readNext(d)); if (unsigned FrameworkOffset = endian::readNext(d)) { - // The framework offset is 1 greater than the actual offset, + // The framework offset is 1 greater than the actual offset, // since 0 is used as an indicator for "no framework name". StringRef FrameworkName(FrameworkStrings + FrameworkOffset - 1); HFI.Framework = HS->getUniqueFrameworkName(FrameworkName); @@ -1707,20 +1721,20 @@ void ASTReader::ReadDefinedMacros() { // Note that we are loading defined macros. Deserializing Macros(this); - for (auto &I : llvm::reverse(ModuleMgr)) { - BitstreamCursor &MacroCursor = I->MacroCursor; + for (ModuleFile &I : llvm::reverse(ModuleMgr)) { + BitstreamCursor &MacroCursor = I.MacroCursor; // If there was no preprocessor block, skip this file. if (MacroCursor.getBitcodeBytes().empty()) continue; BitstreamCursor Cursor = MacroCursor; - Cursor.JumpToBit(I->MacroStartOffset); + Cursor.JumpToBit(I.MacroStartOffset); RecordData Record; while (true) { llvm::BitstreamEntry E = Cursor.advanceSkippingSubblocks(); - + switch (E.Kind) { case llvm::BitstreamEntry::SubBlock: // Handled for us already. case llvm::BitstreamEntry::Error: @@ -1728,21 +1742,21 @@ void ASTReader::ReadDefinedMacros() { return; case llvm::BitstreamEntry::EndBlock: goto NextCursor; - + case llvm::BitstreamEntry::Record: Record.clear(); switch (Cursor.readRecord(E.ID, Record)) { default: // Default behavior: ignore. break; - + case PP_MACRO_OBJECT_LIKE: case PP_MACRO_FUNCTION_LIKE: { - IdentifierInfo *II = getLocalIdentifier(*I, Record[0]); + IdentifierInfo *II = getLocalIdentifier(I, Record[0]); if (II->isOutOfDate()) updateOutOfDateIdentifier(*II); break; } - + case PP_TOKEN: // Ignore tokens. break; @@ -1794,7 +1808,7 @@ namespace { IdTable->find_hashed(Name, NameHash, &Trait); if (Pos == IdTable->end()) return false; - + // Dereferencing the iterator has the effect of building the // IdentifierInfo node and populating it with the various // declarations it needs. @@ -1802,7 +1816,7 @@ namespace { Found = *Pos; return true; } - + // \brief Retrieve the identifier info found within the module // files. IdentifierInfo *getIdentifierInfo() const { return Found; } @@ -1838,7 +1852,7 @@ void ASTReader::updateOutOfDateIdentifier(IdentifierInfo &II) { void ASTReader::markIdentifierUpToDate(IdentifierInfo *II) { if (!II) return; - + II->setOutOfDate(false); // Update the generation for this identifier. @@ -2000,7 +2014,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) { BitstreamCursor &Cursor = F.InputFilesCursor; SavedStreamPosition SavedPosition(Cursor); Cursor.JumpToBit(F.InputFileOffsets[ID-1]); - + InputFileInfo FI = readInputFileInfo(F, ID); off_t StoredSize = FI.StoredSize; time_t StoredTime = FI.StoredTime; @@ -2158,7 +2172,7 @@ ASTReader::ASTReadResult ASTReader::ReadOptionsBlock( ASTReadResult Result = Success; while (true) { llvm::BitstreamEntry Entry = Stream.advance(); - + switch (Entry.Kind) { case llvm::BitstreamEntry::Error: case llvm::BitstreamEntry::SubBlock: @@ -2246,7 +2260,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, unsigned NumUserInputs = 0; while (true) { llvm::BitstreamEntry Entry = Stream.advance(); - + switch (Entry.Kind) { case llvm::BitstreamEntry::Error: Error("malformed block record in AST file"); @@ -2349,7 +2363,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, return Failure; } continue; - + default: if (Stream.SkipBlock()) { Error("malformed block record in AST file"); @@ -2357,7 +2371,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, } continue; } - + case llvm::BitstreamEntry::Record: // The interesting case. break; @@ -2409,7 +2423,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, break; case IMPORTS: { - // Load each of the imported PCH files. + // Load each of the imported PCH files. unsigned Idx = 0, N = Record.size(); while (Idx < N) { // Read information about the AST file. @@ -2513,6 +2527,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, F.InputFileOffsets = (const llvm::support::unaligned_uint64_t *)Blob.data(); F.InputFilesLoaded.resize(NumInputs); + F.NumUserInputFiles = NumUserInputs; break; } } @@ -2531,7 +2546,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { RecordData Record; while (true) { llvm::BitstreamEntry Entry = Stream.advance(); - + switch (Entry.Kind) { case llvm::BitstreamEntry::Error: Error("error at end of module block in AST file"); @@ -2546,7 +2561,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { if (DC->hasExternalLexicalStorage() && !getContext().getLangOpts().CPlusPlus) DC->setMustBuildLookupTable(); - + return Success; } case llvm::BitstreamEntry::SubBlock: @@ -2569,7 +2584,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { F.MacroCursor = Stream; if (!PP.getExternalSource()) PP.setExternalSource(this); - + if (Stream.SkipBlock() || ReadBlockAbbrevs(F.MacroCursor, PREPROCESSOR_BLOCK_ID)) { Error("malformed block record in AST file"); @@ -2577,7 +2592,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } F.MacroStartOffset = F.MacroCursor.GetCurrentBitNo(); break; - + case PREPROCESSOR_DETAIL_BLOCK_ID: F.PreprocessorDetailCursor = Stream; if (Stream.SkipBlock() || @@ -2588,23 +2603,24 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } F.PreprocessorDetailStartOffset = F.PreprocessorDetailCursor.GetCurrentBitNo(); - + if (!PP.getPreprocessingRecord()) PP.createPreprocessingRecord(); if (!PP.getPreprocessingRecord()->getExternalSource()) PP.getPreprocessingRecord()->SetExternalSource(*this); break; - + case SOURCE_MANAGER_BLOCK_ID: if (ReadSourceManagerBlock(F)) return Failure; break; - + case SUBMODULE_BLOCK_ID: - if (ASTReadResult Result = ReadSubmoduleBlock(F, ClientLoadCapabilities)) + if (ASTReadResult Result = + ReadSubmoduleBlock(F, ClientLoadCapabilities)) return Result; break; - + case COMMENTS_BLOCK_ID: { BitstreamCursor C = Stream; if (Stream.SkipBlock() || @@ -2615,7 +2631,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { CommentsCursors.push_back(std::make_pair(C, &F)); break; } - + default: if (Stream.SkipBlock()) { Error("malformed block record in AST file"); @@ -2624,7 +2640,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { break; } continue; - + case llvm::BitstreamEntry::Record: // The interesting case. break; @@ -2646,21 +2662,21 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { F.LocalNumTypes = Record[0]; unsigned LocalBaseTypeIndex = Record[1]; F.BaseTypeIndex = getTotalNumTypes(); - + if (F.LocalNumTypes > 0) { // Introduce the global -> local mapping for types within this module. GlobalTypeMap.insert(std::make_pair(getTotalNumTypes(), &F)); - + // Introduce the local -> global mapping for types within this module. F.TypeRemap.insertOrReplace( - std::make_pair(LocalBaseTypeIndex, + std::make_pair(LocalBaseTypeIndex, F.BaseTypeIndex - LocalBaseTypeIndex)); TypesLoaded.resize(TypesLoaded.size() + F.LocalNumTypes); } break; } - + case DECL_OFFSET: { if (F.LocalNumDecls != 0) { Error("duplicate DECL_OFFSET record in AST file"); @@ -2670,18 +2686,18 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { F.LocalNumDecls = Record[0]; unsigned LocalBaseDeclID = Record[1]; F.BaseDeclID = getTotalNumDecls(); - + if (F.LocalNumDecls > 0) { - // Introduce the global -> local mapping for declarations within this + // Introduce the global -> local mapping for declarations within this // module. GlobalDeclMap.insert( std::make_pair(getTotalNumDecls() + NUM_PREDEF_DECL_IDS, &F)); - + // Introduce the local -> global mapping for declarations within this // module. F.DeclRemap.insertOrReplace( std::make_pair(LocalBaseDeclID, F.BaseDeclID - LocalBaseDeclID)); - + // Introduce the global -> local mapping for declarations within this // module. F.GlobalToLocalDeclIDs[&F] = LocalBaseDeclID; @@ -2690,7 +2706,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } break; } - + case TU_UPDATE_LEXICAL: { DeclContext *TU = Context.getTranslationUnitDecl(); LexicalContents Contents( @@ -2722,7 +2738,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { (const unsigned char *)F.IdentifierTableData + sizeof(uint32_t), (const unsigned char *)F.IdentifierTableData, ASTIdentifierLookupTrait(*this, F)); - + PP.getIdentifierTable().setExternalIdentifierLookup(this); } break; @@ -2736,13 +2752,13 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { F.LocalNumIdentifiers = Record[0]; unsigned LocalBaseIdentifierID = Record[1]; F.BaseIdentifierID = getTotalNumIdentifiers(); - + if (F.LocalNumIdentifiers > 0) { // Introduce the global -> local mapping for identifiers within this // module. - GlobalIdentifierMap.insert(std::make_pair(getTotalNumIdentifiers() + 1, + GlobalIdentifierMap.insert(std::make_pair(getTotalNumIdentifiers() + 1, &F)); - + // Introduce the local -> global mapping for identifiers within this // module. F.IdentifierRemap.insertOrReplace( @@ -2766,6 +2782,14 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { EagerlyDeserializedDecls.push_back(getGlobalDeclID(F, Record[I])); break; + case MODULAR_CODEGEN_DECLS: + // FIXME: Skip reading this record if our ASTConsumer doesn't care about + // them (ie: if we're not codegenerating this module). + if (F.Kind == MK_MainFile) + for (unsigned I = 0, N = Record.size(); I != N; ++I) + EagerlyDeserializedDecls.push_back(getGlobalDeclID(F, Record[I])); + break; + case SPECIAL_TYPES: if (SpecialTypes.empty()) { for (unsigned I = 0, N = Record.size(); I != N; ++I) @@ -2809,11 +2833,11 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { Error("invalid weak identifiers record"); return Failure; } - - // FIXME: Ignore weak undeclared identifiers from non-original PCH + + // FIXME: Ignore weak undeclared identifiers from non-original PCH // files. This isn't the way to do it :) WeakUndeclaredIdentifiers.clear(); - + // Translate the weak, undeclared identifiers into global IDs. for (unsigned I = 0, N = Record.size(); I < N; /* in loop */) { WeakUndeclaredIdentifiers.push_back( @@ -2831,13 +2855,13 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { F.LocalNumSelectors = Record[0]; unsigned LocalBaseSelectorID = Record[1]; F.BaseSelectorID = getTotalNumSelectors(); - + if (F.LocalNumSelectors > 0) { - // Introduce the global -> local mapping for selectors within this + // Introduce the global -> local mapping for selectors within this // module. GlobalSelectorMap.insert(std::make_pair(getTotalNumSelectors()+1, &F)); - - // Introduce the local -> global mapping for selectors within this + + // Introduce the local -> global mapping for selectors within this // module. F.SelectorRemap.insertOrReplace( std::make_pair(LocalBaseSelectorID, @@ -2847,7 +2871,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } break; } - + case METHOD_POOL: F.SelectorLookupTableData = (const unsigned char *)Blob.data(); if (Record[0]) @@ -2862,7 +2886,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { case REFERENCED_SELECTOR_POOL: if (!Record.empty()) { for (unsigned Idx = 0, N = Record.size() - 1; Idx < N; /* in loop */) { - ReferencedSelectorsData.push_back(getGlobalSelectorID(F, + ReferencedSelectorsData.push_back(getGlobalSelectorID(F, Record[Idx++])); ReferencedSelectorsData.push_back(ReadSourceLocation(F, Record, Idx). getRawEncoding()); @@ -2874,7 +2898,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { if (!Record.empty() && Listener) Listener->ReadCounter(F, Record[0]); break; - + case FILE_SORTED_DECLS: F.FileSortedDecls = (const DeclID *)Blob.data(); F.NumFileSortedDecls = Record[0]; @@ -2911,85 +2935,14 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { // This module. Base was 2 when being compiled. F.SLocRemap.insertOrReplace(std::make_pair(2U, static_cast(F.SLocEntryBaseOffset - 2))); - + TotalNumSLocEntries += F.LocalNumSLocEntries; break; } - case MODULE_OFFSET_MAP: { - // Additional remapping information. - const unsigned char *Data = (const unsigned char*)Blob.data(); - const unsigned char *DataEnd = Data + Blob.size(); - - // If we see this entry before SOURCE_LOCATION_OFFSETS, add placeholders. - if (F.SLocRemap.find(0) == F.SLocRemap.end()) { - F.SLocRemap.insert(std::make_pair(0U, 0)); - F.SLocRemap.insert(std::make_pair(2U, 1)); - } - - // Continuous range maps we may be updating in our module. - typedef ContinuousRangeMap::Builder - RemapBuilder; - RemapBuilder SLocRemap(F.SLocRemap); - RemapBuilder IdentifierRemap(F.IdentifierRemap); - RemapBuilder MacroRemap(F.MacroRemap); - RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap); - RemapBuilder SubmoduleRemap(F.SubmoduleRemap); - RemapBuilder SelectorRemap(F.SelectorRemap); - RemapBuilder DeclRemap(F.DeclRemap); - RemapBuilder TypeRemap(F.TypeRemap); - - while (Data < DataEnd) { - using namespace llvm::support; - uint16_t Len = endian::readNext(Data); - StringRef Name = StringRef((const char*)Data, Len); - Data += Len; - ModuleFile *OM = ModuleMgr.lookup(Name); - if (!OM) { - Error("SourceLocation remap refers to unknown module"); - return Failure; - } - - uint32_t SLocOffset = - endian::readNext(Data); - uint32_t IdentifierIDOffset = - endian::readNext(Data); - uint32_t MacroIDOffset = - endian::readNext(Data); - uint32_t PreprocessedEntityIDOffset = - endian::readNext(Data); - uint32_t SubmoduleIDOffset = - endian::readNext(Data); - uint32_t SelectorIDOffset = - endian::readNext(Data); - uint32_t DeclIDOffset = - endian::readNext(Data); - uint32_t TypeIndexOffset = - endian::readNext(Data); - - uint32_t None = std::numeric_limits::max(); - - auto mapOffset = [&](uint32_t Offset, uint32_t BaseOffset, - RemapBuilder &Remap) { - if (Offset != None) - Remap.insert(std::make_pair(Offset, - static_cast(BaseOffset - Offset))); - }; - mapOffset(SLocOffset, OM->SLocEntryBaseOffset, SLocRemap); - mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap); - mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap); - mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID, - PreprocessedEntityRemap); - mapOffset(SubmoduleIDOffset, OM->BaseSubmoduleID, SubmoduleRemap); - mapOffset(SelectorIDOffset, OM->BaseSelectorID, SelectorRemap); - mapOffset(DeclIDOffset, OM->BaseDeclID, DeclRemap); - mapOffset(TypeIndexOffset, OM->BaseTypeIndex, TypeRemap); - - // Global -> local mappings. - F.GlobalToLocalDeclIDs[OM] = DeclIDOffset; - } + case MODULE_OFFSET_MAP: + F.ModuleOffsetMap = Blob; break; - } case SOURCE_MANAGER_LINE_TABLE: if (ParseLineTable(F, Record)) @@ -3003,7 +2956,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { Error("Multiple SOURCE_LOCATION_PRELOADS records in AST file"); return Failure; } - + F.PreloadSLocEntries.swap(Record); break; } @@ -3018,12 +2971,12 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { Error("Invalid VTABLE_USES record"); return Failure; } - + // Later tables overwrite earlier ones. // FIXME: Modules will have some trouble with this. This is clearly not // the right way to do this. VTableUses.clear(); - + for (unsigned Idx = 0, N = Record.size(); Idx != N; /* In loop */) { VTableUses.push_back(getGlobalDeclID(F, Record[Idx++])); VTableUses.push_back( @@ -3065,13 +3018,13 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { F.NumPreprocessedEntities = Blob.size() / sizeof(PPEntityOffset); unsigned LocalBasePreprocessedEntityID = Record[0]; - + unsigned StartingID; if (!PP.getPreprocessingRecord()) PP.createPreprocessingRecord(); if (!PP.getPreprocessingRecord()->getExternalSource()) PP.getPreprocessingRecord()->SetExternalSource(*this); - StartingID + StartingID = PP.getPreprocessingRecord() ->allocateLoadedEntities(F.NumPreprocessedEntities); F.BasePreprocessedEntityID = StartingID; @@ -3080,7 +3033,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { // Introduce the global -> local mapping for preprocessed entities in // this module. GlobalPreprocessedEntityMap.insert(std::make_pair(StartingID, &F)); - + // Introduce the local -> global mapping for preprocessed entities in // this module. F.PreprocessedEntityRemap.insertOrReplace( @@ -3090,7 +3043,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { break; } - + case DECL_UPDATE_OFFSETS: { if (Record.size() % 2 != 0) { Error("invalid DECL_UPDATE_OFFSETS block in AST file"); @@ -3113,12 +3066,12 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { Error("duplicate OBJC_CATEGORIES_MAP record in AST file"); return Failure; } - + F.LocalNumObjCCategoriesInMap = Record[0]; F.ObjCCategoriesMap = (const ObjCCategoriesInfo *)Blob.data(); break; } - + case OBJC_CATEGORIES: F.ObjCCategories.swap(Record); break; @@ -3130,7 +3083,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { F.PragmaDiagMappings.insert(F.PragmaDiagMappings.end(), Record.begin(), Record.end()); break; - + case CUDA_SPECIAL_DECL_REFS: // Later tables overwrite earlier ones. // FIXME: Modules will have trouble with this. @@ -3147,32 +3100,62 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { = HeaderFileInfoLookupTable::Create( (const unsigned char *)F.HeaderFileInfoTableData + Record[0], (const unsigned char *)F.HeaderFileInfoTableData, - HeaderFileInfoTrait(*this, F, + HeaderFileInfoTrait(*this, F, &PP.getHeaderSearchInfo(), Blob.data() + Record[2])); - + PP.getHeaderSearchInfo().SetExternalSource(this); if (!PP.getHeaderSearchInfo().getExternalLookup()) PP.getHeaderSearchInfo().SetExternalLookup(this); } break; } - + case FP_PRAGMA_OPTIONS: // Later tables overwrite earlier ones. FPPragmaOptions.swap(Record); break; case OPENCL_EXTENSIONS: - // Later tables overwrite earlier ones. - OpenCLExtensions.swap(Record); + for (unsigned I = 0, E = Record.size(); I != E; ) { + auto Name = ReadString(Record, I); + auto &Opt = OpenCLExtensions.OptMap[Name]; + Opt.Supported = Record[I++] != 0; + Opt.Enabled = Record[I++] != 0; + Opt.Avail = Record[I++]; + Opt.Core = Record[I++]; + } + break; + + case OPENCL_EXTENSION_TYPES: + for (unsigned I = 0, E = Record.size(); I != E;) { + auto TypeID = static_cast<::TypeID>(Record[I++]); + auto *Type = GetType(TypeID).getTypePtr(); + auto NumExt = static_cast(Record[I++]); + for (unsigned II = 0; II != NumExt; ++II) { + auto Ext = ReadString(Record, I); + OpenCLTypeExtMap[Type].insert(Ext); + } + } + break; + + case OPENCL_EXTENSION_DECLS: + for (unsigned I = 0, E = Record.size(); I != E;) { + auto DeclID = static_cast<::DeclID>(Record[I++]); + auto *Decl = GetDecl(DeclID); + auto NumExt = static_cast(Record[I++]); + for (unsigned II = 0; II != NumExt; ++II) { + auto Ext = ReadString(Record, I); + OpenCLDeclExtMap[Decl].insert(Ext); + } + } break; case TENTATIVE_DEFINITIONS: for (unsigned I = 0, N = Record.size(); I != N; ++I) TentativeDefinitions.push_back(getGlobalDeclID(F, Record[I])); break; - + case KNOWN_NAMESPACES: for (unsigned I = 0, N = Record.size(); I != N; ++I) KnownNamespaces.push_back(getGlobalDeclID(F, Record[I])); @@ -3215,8 +3198,11 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { for (unsigned I = 0, N = Record.size(); I != N; /**/) { unsigned GlobalID = getGlobalSubmoduleID(F, Record[I++]); SourceLocation Loc = ReadSourceLocation(F, Record, I); - if (GlobalID) + if (GlobalID) { ImportedModules.push_back(ImportedSubmodule(GlobalID, Loc)); + if (DeserializationListener) + DeserializationListener->ModuleImportRead(GlobalID, Loc); + } } } break; @@ -3293,6 +3279,87 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } } +void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { + assert(!F.ModuleOffsetMap.empty() && "no module offset map to read"); + + // Additional remapping information. + const unsigned char *Data = (const unsigned char*)F.ModuleOffsetMap.data(); + const unsigned char *DataEnd = Data + F.ModuleOffsetMap.size(); + F.ModuleOffsetMap = StringRef(); + + // If we see this entry before SOURCE_LOCATION_OFFSETS, add placeholders. + if (F.SLocRemap.find(0) == F.SLocRemap.end()) { + F.SLocRemap.insert(std::make_pair(0U, 0)); + F.SLocRemap.insert(std::make_pair(2U, 1)); + } + + // Continuous range maps we may be updating in our module. + typedef ContinuousRangeMap::Builder + RemapBuilder; + RemapBuilder SLocRemap(F.SLocRemap); + RemapBuilder IdentifierRemap(F.IdentifierRemap); + RemapBuilder MacroRemap(F.MacroRemap); + RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap); + RemapBuilder SubmoduleRemap(F.SubmoduleRemap); + RemapBuilder SelectorRemap(F.SelectorRemap); + RemapBuilder DeclRemap(F.DeclRemap); + RemapBuilder TypeRemap(F.TypeRemap); + + while (Data < DataEnd) { + // FIXME: Looking up dependency modules by filename is horrible. + using namespace llvm::support; + uint16_t Len = endian::readNext(Data); + StringRef Name = StringRef((const char*)Data, Len); + Data += Len; + ModuleFile *OM = ModuleMgr.lookup(Name); + if (!OM) { + std::string Msg = + "SourceLocation remap refers to unknown module, cannot find "; + Msg.append(Name); + Error(Msg); + return; + } + + uint32_t SLocOffset = + endian::readNext(Data); + uint32_t IdentifierIDOffset = + endian::readNext(Data); + uint32_t MacroIDOffset = + endian::readNext(Data); + uint32_t PreprocessedEntityIDOffset = + endian::readNext(Data); + uint32_t SubmoduleIDOffset = + endian::readNext(Data); + uint32_t SelectorIDOffset = + endian::readNext(Data); + uint32_t DeclIDOffset = + endian::readNext(Data); + uint32_t TypeIndexOffset = + endian::readNext(Data); + + uint32_t None = std::numeric_limits::max(); + + auto mapOffset = [&](uint32_t Offset, uint32_t BaseOffset, + RemapBuilder &Remap) { + if (Offset != None) + Remap.insert(std::make_pair(Offset, + static_cast(BaseOffset - Offset))); + }; + mapOffset(SLocOffset, OM->SLocEntryBaseOffset, SLocRemap); + mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap); + mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap); + mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID, + PreprocessedEntityRemap); + mapOffset(SubmoduleIDOffset, OM->BaseSubmoduleID, SubmoduleRemap); + mapOffset(SelectorIDOffset, OM->BaseSelectorID, SelectorRemap); + mapOffset(DeclIDOffset, OM->BaseDeclID, DeclRemap); + mapOffset(TypeIndexOffset, OM->BaseTypeIndex, TypeRemap); + + // Global -> local mappings. + F.GlobalToLocalDeclIDs[OM] = DeclIDOffset; + } +} + ASTReader::ASTReadResult ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, const ModuleFile *ImportedBy, @@ -3312,8 +3379,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, // usable header search context. assert(!F.ModuleName.empty() && "MODULE_NAME should come before MODULE_MAP_FILE"); - if (F.Kind == MK_ImplicitModule && - (*ModuleMgr.begin())->Kind != MK_MainFile) { + if (F.Kind == MK_ImplicitModule && ModuleMgr.begin()->Kind != MK_MainFile) { // An implicitly-loaded module file should have its module listed in some // module map file that we've already loaded. Module *M = PP.getHeaderSearchInfo().lookupModule(F.ModuleName); @@ -3515,7 +3581,7 @@ bool ASTReader::loadGlobalIndex() { if (TriedLoadingGlobalIndex || !UseGlobalIndex || !Context.getLangOpts().Modules) return true; - + // Try to load the global index. TriedLoadingGlobalIndex = true; StringRef ModuleCachePath @@ -3605,11 +3671,10 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, for (const ImportedModule &IM : Loaded) LoadedSet.insert(IM.Mod); - ModuleMgr.removeModules(ModuleMgr.begin() + NumModules, ModuleMgr.end(), - LoadedSet, + ModuleMgr.removeModules(ModuleMgr.begin() + NumModules, LoadedSet, Context.getLangOpts().Modules - ? &PP.getHeaderSearchInfo().getModuleMap() - : nullptr); + ? &PP.getHeaderSearchInfo().getModuleMap() + : nullptr); // If we find that any modules are unusable, the global index is going // to be out-of-date. Just remove it. @@ -3639,12 +3704,12 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, return Result; } - // Once read, set the ModuleFile bit base offset and update the size in + // Once read, set the ModuleFile bit base offset and update the size in // bits of all files we've seen. F.GlobalBitOffset = TotalModulesSizeInBits; TotalModulesSizeInBits += F.SizeInBits; GlobalBitOffsetsMap.insert(std::make_pair(F.GlobalBitOffset, &F)); - + // Preload SLocEntries. for (unsigned I = 0, N = F.PreloadSLocEntries.size(); I != N; ++I) { int Index = int(F.PreloadSLocEntries[I] - 1) + F.SLocEntryBaseID; @@ -3714,7 +3779,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, // Mark selectors as out of date. for (auto Sel : SelectorGeneration) SelectorOutOfDate[Sel.first] = true; - + // Resolve any unresolved module exports. for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) { UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I]; @@ -3752,7 +3817,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, // FIXME: How do we load the 'use'd modules? They may not be submodules. // Might be unnecessary as use declarations are only used to build the // module itself. - + InitializeContext(); if (SemaObj) @@ -3763,7 +3828,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, ModuleFile &PrimaryModule = ModuleMgr.getPrimaryModule(); if (PrimaryModule.OriginalSourceFileID.isValid()) { - PrimaryModule.OriginalSourceFileID + PrimaryModule.OriginalSourceFileID = FileID::get(PrimaryModule.SLocEntryBaseID + PrimaryModule.OriginalSourceFileID.getOpaqueValue() - 1); @@ -3776,11 +3841,11 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, SourceMgr.setMainFileID(PrimaryModule.OriginalSourceFileID); } } - + // For any Objective-C class definitions we have already loaded, make sure // that we load any additional categories. for (unsigned I = 0, N = ObjCClassesLoaded.size(); I != N; ++I) { - loadObjCCategories(ObjCClassesLoaded[I]->getGlobalID(), + loadObjCCategories(ObjCClassesLoaded[I]->getGlobalID(), ObjCClassesLoaded[I], PreviousGeneration); } @@ -3892,7 +3957,7 @@ ASTReader::ReadASTCore(StringRef FileName, BitstreamCursor &Stream = F.Stream; Stream = BitstreamCursor(PCHContainerRdr.ExtractPCH(*F.Buffer)); F.SizeInBits = F.Buffer->getBufferSize() * 8; - + // Sniff for the signature. if (!startsWithASTFileMagic(Stream)) { Diag(diag::err_module_file_invalid) << moduleKindForDiagnostic(Type) @@ -3904,14 +3969,14 @@ ASTReader::ReadASTCore(StringRef FileName, bool HaveReadControlBlock = false; while (true) { llvm::BitstreamEntry Entry = Stream.advance(); - + switch (Entry.Kind) { case llvm::BitstreamEntry::Error: case llvm::BitstreamEntry::Record: case llvm::BitstreamEntry::EndBlock: Error("invalid record at top-level of AST file"); return Failure; - + case llvm::BitstreamEntry::SubBlock: break; } @@ -4042,26 +4107,26 @@ ASTReader::ASTReadResult ASTReader::ReadExtensionBlock(ModuleFile &F) { void ASTReader::InitializeContext() { // If there's a listener, notify them that we "read" the translation unit. if (DeserializationListener) - DeserializationListener->DeclRead(PREDEF_DECL_TRANSLATION_UNIT_ID, + DeserializationListener->DeclRead(PREDEF_DECL_TRANSLATION_UNIT_ID, Context.getTranslationUnitDecl()); // FIXME: Find a better way to deal with collisions between these // built-in types. Right now, we just ignore the problem. - + // Load the special types. if (SpecialTypes.size() >= NumSpecialTypeIDs) { if (unsigned String = SpecialTypes[SPECIAL_TYPE_CF_CONSTANT_STRING]) { if (!Context.CFConstantStringTypeDecl) Context.setCFConstantStringType(GetType(String)); } - + if (unsigned File = SpecialTypes[SPECIAL_TYPE_FILE]) { QualType FileType = GetType(File); if (FileType.isNull()) { Error("FILE type is NULL"); return; } - + if (!Context.FILEDecl) { if (const TypedefType *Typedef = FileType->getAs()) Context.setFILEDecl(Typedef->getDecl()); @@ -4075,14 +4140,14 @@ void ASTReader::InitializeContext() { } } } - + if (unsigned Jmp_buf = SpecialTypes[SPECIAL_TYPE_JMP_BUF]) { QualType Jmp_bufType = GetType(Jmp_buf); if (Jmp_bufType.isNull()) { Error("jmp_buf type is NULL"); return; } - + if (!Context.jmp_bufDecl) { if (const TypedefType *Typedef = Jmp_bufType->getAs()) Context.setjmp_bufDecl(Typedef->getDecl()); @@ -4096,14 +4161,14 @@ void ASTReader::InitializeContext() { } } } - + if (unsigned Sigjmp_buf = SpecialTypes[SPECIAL_TYPE_SIGJMP_BUF]) { QualType Sigjmp_bufType = GetType(Sigjmp_buf); if (Sigjmp_bufType.isNull()) { Error("sigjmp_buf type is NULL"); return; } - + if (!Context.sigjmp_bufDecl) { if (const TypedefType *Typedef = Sigjmp_bufType->getAs()) Context.setsigjmp_bufDecl(Typedef->getDecl()); @@ -4151,7 +4216,7 @@ void ASTReader::InitializeContext() { } } } - + ReadPragmaDiagnosticMappings(Context.getDiagnostics()); // If there were any CUDA special declarations, deserialize them. @@ -4226,7 +4291,7 @@ std::string ASTReader::getOriginalSourceFile( Diags.Report(diag::err_fe_not_a_pch_file) << ASTFileName; return std::string(); } - + // Scan for the CONTROL_BLOCK_ID block. if (SkipCursorToBlock(Stream, CONTROL_BLOCK_ID)) { Diags.Report(diag::err_fe_pch_malformed_block) << ASTFileName; @@ -4239,12 +4304,12 @@ std::string ASTReader::getOriginalSourceFile( llvm::BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); if (Entry.Kind == llvm::BitstreamEntry::EndBlock) return std::string(); - + if (Entry.Kind != llvm::BitstreamEntry::Record) { Diags.Report(diag::err_fe_pch_malformed_block) << ASTFileName; return std::string(); } - + Record.clear(); StringRef Blob; if (Stream.readRecord(Entry.ID, Record, &Blob) == ORIGINAL_FILE) @@ -4393,7 +4458,7 @@ bool ASTReader::readASTFileControlBlock( if (Listener.ReadFullVersionInformation(Blob)) return true; - + break; } case MODULE_NAME: @@ -4539,7 +4604,7 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { RecordData Record; while (true) { llvm::BitstreamEntry Entry = F.Stream.advanceSkippingSubblocks(); - + switch (Entry.Kind) { case llvm::BitstreamEntry::SubBlock: // Handled for us already. case llvm::BitstreamEntry::Error: @@ -4591,6 +4656,7 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { bool InferExplicitSubmodules = Record[Idx++]; bool InferExportWildcard = Record[Idx++]; bool ConfigMacrosExhaustive = Record[Idx++]; + bool WithCodegen = Record[Idx++]; Module *ParentModule = nullptr; if (Parent) @@ -4598,8 +4664,9 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { // Retrieve this (sub)module from the module map, creating it if // necessary. - CurrentModule = ModMap.findOrCreateModule(Name, ParentModule, IsFramework, - IsExplicit).first; + CurrentModule = + ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit) + .first; // FIXME: set the definition loc for CurrentModule, or call // ModMap.setInferredModuleAllowedBy() @@ -4635,9 +4702,10 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { CurrentModule->InferExplicitSubmodules = InferExplicitSubmodules; CurrentModule->InferExportWildcard = InferExportWildcard; CurrentModule->ConfigMacrosExhaustive = ConfigMacrosExhaustive; + CurrentModule->WithCodegen = WithCodegen; if (DeserializationListener) DeserializationListener->ModuleRead(GlobalID, CurrentModule); - + SubmodulesLoaded[GlobalIndex] = CurrentModule; // Clear out data that will be replaced by what is in the module file. @@ -4677,7 +4745,7 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } break; } - + case SUBMODULE_HEADER: case SUBMODULE_EXCLUDED_HEADER: case SUBMODULE_PRIVATE_HEADER: @@ -4711,17 +4779,17 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } break; } - + case SUBMODULE_METADATA: { F.BaseSubmoduleID = getTotalNumSubmodules(); F.LocalNumSubmodules = Record[0]; unsigned LocalBaseSubmoduleID = Record[1]; if (F.LocalNumSubmodules > 0) { - // Introduce the global -> local mapping for submodules within this + // Introduce the global -> local mapping for submodules within this // module. GlobalSubmoduleMap.insert(std::make_pair(getTotalNumSubmodules()+1,&F)); - - // Introduce the local -> global mapping for submodules within this + + // Introduce the local -> global mapping for submodules within this // module. F.SubmoduleRemap.insertOrReplace( std::make_pair(LocalBaseSubmoduleID, @@ -4731,7 +4799,7 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { } break; } - + case SUBMODULE_IMPORTS: { for (unsigned Idx = 0; Idx != Record.size(); ++Idx) { UnresolvedModuleRef Unresolved; @@ -4755,8 +4823,8 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { Unresolved.IsWildcard = Record[Idx + 1]; UnresolvedModuleRefs.push_back(Unresolved); } - - // Once we've loaded the set of exports, there's no reason to keep + + // Once we've loaded the set of exports, there's no reason to keep // the parsed, unresolved exports around. CurrentModule->UnresolvedExports.clear(); break; @@ -4969,7 +5037,7 @@ std::pair ASTReader::getModulePreprocessedEntity(unsigned GlobalIndex) { GlobalPreprocessedEntityMapType::iterator I = GlobalPreprocessedEntityMap.find(GlobalIndex); - assert(I != GlobalPreprocessedEntityMap.end() && + assert(I != GlobalPreprocessedEntityMap.end() && "Corrupted global preprocessed entity map"); ModuleFile *M = I->second; unsigned LocalIndex = GlobalIndex - M->BasePreprocessedEntityID; @@ -5005,8 +5073,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) { Error("no preprocessing record"); return nullptr; } - - SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor); + + SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor); M.PreprocessorDetailCursor.JumpToBit(PPOffs.BitOffset); llvm::BitstreamEntry Entry = @@ -5045,7 +5113,7 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) { return ME; } - + case PPD_MACRO_DEFINITION: { // Decode the identifier info and then check again; if the macro is // still defined and associated with the identifier, @@ -5057,14 +5125,14 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) { return MD; } - + case PPD_INCLUSION_DIRECTIVE: { const char *FullFileNameStart = Blob.data() + Record[0]; StringRef FullFileName(FullFileNameStart, Blob.size() - Record[0]); const FileEntry *File = nullptr; if (!FullFileName.empty()) File = PP.getFileManager().getFile(FullFileName); - + // FIXME: Stable encoding InclusionDirective::InclusionKind Kind = static_cast(Record[2]); @@ -5205,11 +5273,11 @@ Optional ASTReader::isPreprocessedEntityInFileID(unsigned Index, ModuleFile &M = *PPInfo.first; unsigned LocalIndex = PPInfo.second; const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex]; - + SourceLocation Loc = TranslateSourceLocation(M, PPOffs.getBegin()); if (Loc.isInvalid()) return false; - + if (SourceMgr.isInFileID(SourceMgr.getFileLoc(Loc), FID)) return true; else @@ -5221,9 +5289,9 @@ namespace { /// \brief Visitor used to search for information about a header file. class HeaderFileInfoVisitor { const FileEntry *FE; - + Optional HFI; - + public: explicit HeaderFileInfoVisitor(const FileEntry *FE) : FE(FE) { } @@ -5242,7 +5310,7 @@ namespace { HFI = *Pos; return true; } - + Optional getHeaderFileInfo() const { return HFI; } }; @@ -5253,53 +5321,88 @@ HeaderFileInfo ASTReader::GetHeaderFileInfo(const FileEntry *FE) { ModuleMgr.visit(Visitor); if (Optional HFI = Visitor.getHeaderFileInfo()) return *HFI; - + return HeaderFileInfo(); } void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { - // FIXME: Make it work properly with modules. - SmallVector DiagStates; - for (ModuleIterator I = ModuleMgr.begin(), E = ModuleMgr.end(); I != E; ++I) { - ModuleFile &F = *(*I); + using DiagState = DiagnosticsEngine::DiagState; + SmallVector DiagStates; + + for (ModuleFile &F : ModuleMgr) { unsigned Idx = 0; + auto &Record = F.PragmaDiagMappings; + if (Record.empty()) + continue; + DiagStates.clear(); - assert(!Diag.DiagStates.empty()); - DiagStates.push_back(&Diag.DiagStates.front()); // the command-line one. - while (Idx < F.PragmaDiagMappings.size()) { - SourceLocation Loc = ReadSourceLocation(F, F.PragmaDiagMappings[Idx++]); - unsigned DiagStateID = F.PragmaDiagMappings[Idx++]; - if (DiagStateID != 0) { - Diag.DiagStatePoints.push_back( - DiagnosticsEngine::DiagStatePoint(DiagStates[DiagStateID-1], - FullSourceLoc(Loc, SourceMgr))); - continue; - } - - assert(DiagStateID == 0); + + auto ReadDiagState = + [&](const DiagState &BasedOn, SourceLocation Loc, + bool IncludeNonPragmaStates) -> DiagnosticsEngine::DiagState * { + unsigned BackrefID = Record[Idx++]; + if (BackrefID != 0) + return DiagStates[BackrefID - 1]; + // A new DiagState was created here. - Diag.DiagStates.push_back(*Diag.GetCurDiagState()); - DiagnosticsEngine::DiagState *NewState = &Diag.DiagStates.back(); + Diag.DiagStates.push_back(BasedOn); + DiagState *NewState = &Diag.DiagStates.back(); DiagStates.push_back(NewState); - Diag.DiagStatePoints.push_back( - DiagnosticsEngine::DiagStatePoint(NewState, - FullSourceLoc(Loc, SourceMgr))); - while (true) { - assert(Idx < F.PragmaDiagMappings.size() && - "Invalid data, didn't find '-1' marking end of diag/map pairs"); - if (Idx >= F.PragmaDiagMappings.size()) { - break; // Something is messed up but at least avoid infinite loop in - // release build. - } - unsigned DiagID = F.PragmaDiagMappings[Idx++]; - if (DiagID == (unsigned)-1) { - break; // no more diag/map pairs for this location. - } - diag::Severity Map = (diag::Severity)F.PragmaDiagMappings[Idx++]; + while (Idx + 1 < Record.size() && Record[Idx] != unsigned(-1)) { + unsigned DiagID = Record[Idx++]; + diag::Severity Map = (diag::Severity)Record[Idx++]; DiagnosticMapping Mapping = Diag.makeUserMapping(Map, Loc); - Diag.GetCurDiagState()->setMapping(DiagID, Mapping); + if (Mapping.isPragma() || IncludeNonPragmaStates) + NewState->setMapping(DiagID, Mapping); + } + assert(Idx != Record.size() && Record[Idx] == unsigned(-1) && + "Invalid data, didn't find '-1' marking end of diag/map pairs"); + ++Idx; + return NewState; + }; + + auto *FirstState = ReadDiagState( + F.isModule() ? DiagState() : *Diag.DiagStatesByLoc.CurDiagState, + SourceLocation(), F.isModule()); + SourceLocation CurStateLoc = + ReadSourceLocation(F, F.PragmaDiagMappings[Idx++]); + auto *CurState = ReadDiagState(*FirstState, CurStateLoc, false); + + if (!F.isModule()) { + Diag.DiagStatesByLoc.CurDiagState = CurState; + Diag.DiagStatesByLoc.CurDiagStateLoc = CurStateLoc; + + // Preserve the property that the imaginary root file describes the + // current state. + auto &T = Diag.DiagStatesByLoc.Files[FileID()].StateTransitions; + if (T.empty()) + T.push_back({CurState, 0}); + else + T[0].State = CurState; + } + + while (Idx < Record.size()) { + SourceLocation Loc = ReadSourceLocation(F, Record[Idx++]); + auto IDAndOffset = SourceMgr.getDecomposedLoc(Loc); + assert(IDAndOffset.second == 0 && "not a start location for a FileID"); + unsigned Transitions = Record[Idx++]; + + // Note that we don't need to set up Parent/ParentOffset here, because + // we won't be changing the diagnostic state within imported FileIDs + // (other than perhaps appending to the main source file, which has no + // parent). + auto &F = Diag.DiagStatesByLoc.Files[IDAndOffset.first]; + F.StateTransitions.reserve(F.StateTransitions.size() + Transitions); + for (unsigned I = 0; I != Transitions; ++I) { + unsigned Offset = Record[Idx++]; + auto *State = + ReadDiagState(*FirstState, Loc.getLocWithOffset(Offset), false); + F.StateTransitions.push_back({State, Offset}); } } + + // Don't try to read these mappings again. + Record.clear(); } } @@ -5421,7 +5524,7 @@ QualType ASTReader::readTypeRecord(unsigned Index) { QualType ClassType = readType(*Loc.F, Record, Idx); if (PointeeType.isNull() || ClassType.isNull()) return QualType(); - + return Context.getMemberPointerType(PointeeType, ClassType.getTypePtr()); } @@ -5531,7 +5634,7 @@ QualType ASTReader::readTypeRecord(unsigned Index) { return Context.getTypeDeclType( ReadDeclAs(*Loc.F, Record, Idx)); } - + case TYPE_TYPEDEF: { if (Record.size() != 2) { Error("incorrect encoding of typedef type"); @@ -5576,6 +5679,14 @@ QualType ASTReader::readTypeRecord(unsigned Index) { return Context.getAutoType(Deduced, Keyword, IsDependent); } + case TYPE_DEDUCED_TEMPLATE_SPECIALIZATION: { + TemplateName Name = ReadTemplateName(*Loc.F, Record, Idx); + QualType Deduced = readType(*Loc.F, Record, Idx); + bool IsDependent = Deduced.isNull() ? Record[Idx++] : false; + return Context.getDeducedTemplateSpecializationType(Name, Deduced, + IsDependent); + } + case TYPE_RECORD: { if (Record.size() != 2) { Error("incorrect encoding of record type"); @@ -5836,26 +5947,27 @@ void ASTReader::readExceptionSpec(ModuleFile &ModuleFile, } class clang::TypeLocReader : public TypeLocVisitor { - ASTReader &Reader; - ModuleFile &F; + ModuleFile *F; + ASTReader *Reader; const ASTReader::RecordData &Record; unsigned &Idx; - SourceLocation ReadSourceLocation(const ASTReader::RecordData &R, - unsigned &I) { - return Reader.ReadSourceLocation(F, R, I); + SourceLocation ReadSourceLocation() { + return Reader->ReadSourceLocation(*F, Record, Idx); + } + + TypeSourceInfo *GetTypeSourceInfo() { + return Reader->GetTypeSourceInfo(*F, Record, Idx); } - template - T *ReadDeclAs(const ASTReader::RecordData &Record, unsigned &Idx) { - return Reader.ReadDeclAs(F, Record, Idx); + NestedNameSpecifierLoc ReadNestedNameSpecifierLoc() { + return Reader->ReadNestedNameSpecifierLoc(*F, Record, Idx); } - + public: - TypeLocReader(ASTReader &Reader, ModuleFile &F, + TypeLocReader(ModuleFile &F, ASTReader &Reader, const ASTReader::RecordData &Record, unsigned &Idx) - : Reader(Reader), F(F), Record(Record), Idx(Idx) - { } + : F(&F), Reader(&Reader), Record(Record), Idx(Idx) {} // We want compile-time assurance that we've enumerated all of // these, so unfortunately we have to declare them first, then @@ -5874,7 +5986,7 @@ void TypeLocReader::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) { } void TypeLocReader::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) { - TL.setBuiltinLoc(ReadSourceLocation(Record, Idx)); + TL.setBuiltinLoc(ReadSourceLocation()); if (TL.needsExtraLocalData()) { TL.setWrittenTypeSpec(static_cast(Record[Idx++])); TL.setWrittenSignSpec(static_cast(Record[Idx++])); @@ -5884,11 +5996,11 @@ void TypeLocReader::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) { } void TypeLocReader::VisitComplexTypeLoc(ComplexTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitPointerTypeLoc(PointerTypeLoc TL) { - TL.setStarLoc(ReadSourceLocation(Record, Idx)); + TL.setStarLoc(ReadSourceLocation()); } void TypeLocReader::VisitDecayedTypeLoc(DecayedTypeLoc TL) { @@ -5900,27 +6012,27 @@ void TypeLocReader::VisitAdjustedTypeLoc(AdjustedTypeLoc TL) { } void TypeLocReader::VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) { - TL.setCaretLoc(ReadSourceLocation(Record, Idx)); + TL.setCaretLoc(ReadSourceLocation()); } void TypeLocReader::VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) { - TL.setAmpLoc(ReadSourceLocation(Record, Idx)); + TL.setAmpLoc(ReadSourceLocation()); } void TypeLocReader::VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) { - TL.setAmpAmpLoc(ReadSourceLocation(Record, Idx)); + TL.setAmpAmpLoc(ReadSourceLocation()); } void TypeLocReader::VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) { - TL.setStarLoc(ReadSourceLocation(Record, Idx)); - TL.setClassTInfo(Reader.GetTypeSourceInfo(F, Record, Idx)); + TL.setStarLoc(ReadSourceLocation()); + TL.setClassTInfo(GetTypeSourceInfo()); } void TypeLocReader::VisitArrayTypeLoc(ArrayTypeLoc TL) { - TL.setLBracketLoc(ReadSourceLocation(Record, Idx)); - TL.setRBracketLoc(ReadSourceLocation(Record, Idx)); + TL.setLBracketLoc(ReadSourceLocation()); + TL.setRBracketLoc(ReadSourceLocation()); if (Record[Idx++]) - TL.setSizeExpr(Reader.ReadExpr(F)); + TL.setSizeExpr(Reader->ReadExpr(*F)); else TL.setSizeExpr(nullptr); } @@ -5944,24 +6056,26 @@ void TypeLocReader::VisitDependentSizedArrayTypeLoc( void TypeLocReader::VisitDependentSizedExtVectorTypeLoc( DependentSizedExtVectorTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitVectorTypeLoc(VectorTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitFunctionTypeLoc(FunctionTypeLoc TL) { - TL.setLocalRangeBegin(ReadSourceLocation(Record, Idx)); - TL.setLParenLoc(ReadSourceLocation(Record, Idx)); - TL.setRParenLoc(ReadSourceLocation(Record, Idx)); - TL.setLocalRangeEnd(ReadSourceLocation(Record, Idx)); + TL.setLocalRangeBegin(ReadSourceLocation()); + TL.setLParenLoc(ReadSourceLocation()); + TL.setRParenLoc(ReadSourceLocation()); + TL.setExceptionSpecRange(SourceRange(Reader->ReadSourceLocation(*F, Record, Idx), + Reader->ReadSourceLocation(*F, Record, Idx))); + TL.setLocalRangeEnd(ReadSourceLocation()); for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i) { - TL.setParam(i, ReadDeclAs(Record, Idx)); + TL.setParam(i, Reader->ReadDeclAs(*F, Record, Idx)); } } @@ -5973,173 +6087,178 @@ void TypeLocReader::VisitFunctionNoProtoTypeLoc(FunctionNoProtoTypeLoc TL) { VisitFunctionTypeLoc(TL); } void TypeLocReader::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitTypedefTypeLoc(TypedefTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) { - TL.setTypeofLoc(ReadSourceLocation(Record, Idx)); - TL.setLParenLoc(ReadSourceLocation(Record, Idx)); - TL.setRParenLoc(ReadSourceLocation(Record, Idx)); + TL.setTypeofLoc(ReadSourceLocation()); + TL.setLParenLoc(ReadSourceLocation()); + TL.setRParenLoc(ReadSourceLocation()); } void TypeLocReader::VisitTypeOfTypeLoc(TypeOfTypeLoc TL) { - TL.setTypeofLoc(ReadSourceLocation(Record, Idx)); - TL.setLParenLoc(ReadSourceLocation(Record, Idx)); - TL.setRParenLoc(ReadSourceLocation(Record, Idx)); - TL.setUnderlyingTInfo(Reader.GetTypeSourceInfo(F, Record, Idx)); + TL.setTypeofLoc(ReadSourceLocation()); + TL.setLParenLoc(ReadSourceLocation()); + TL.setRParenLoc(ReadSourceLocation()); + TL.setUnderlyingTInfo(GetTypeSourceInfo()); } void TypeLocReader::VisitDecltypeTypeLoc(DecltypeTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) { - TL.setKWLoc(ReadSourceLocation(Record, Idx)); - TL.setLParenLoc(ReadSourceLocation(Record, Idx)); - TL.setRParenLoc(ReadSourceLocation(Record, Idx)); - TL.setUnderlyingTInfo(Reader.GetTypeSourceInfo(F, Record, Idx)); + TL.setKWLoc(ReadSourceLocation()); + TL.setLParenLoc(ReadSourceLocation()); + TL.setRParenLoc(ReadSourceLocation()); + TL.setUnderlyingTInfo(GetTypeSourceInfo()); } void TypeLocReader::VisitAutoTypeLoc(AutoTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); +} + +void TypeLocReader::VisitDeducedTemplateSpecializationTypeLoc( + DeducedTemplateSpecializationTypeLoc TL) { + TL.setTemplateNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitRecordTypeLoc(RecordTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitEnumTypeLoc(EnumTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitAttributedTypeLoc(AttributedTypeLoc TL) { - TL.setAttrNameLoc(ReadSourceLocation(Record, Idx)); + TL.setAttrNameLoc(ReadSourceLocation()); if (TL.hasAttrOperand()) { SourceRange range; - range.setBegin(ReadSourceLocation(Record, Idx)); - range.setEnd(ReadSourceLocation(Record, Idx)); + range.setBegin(ReadSourceLocation()); + range.setEnd(ReadSourceLocation()); TL.setAttrOperandParensRange(range); } if (TL.hasAttrExprOperand()) { if (Record[Idx++]) - TL.setAttrExprOperand(Reader.ReadExpr(F)); + TL.setAttrExprOperand(Reader->ReadExpr(*F)); else TL.setAttrExprOperand(nullptr); } else if (TL.hasAttrEnumOperand()) - TL.setAttrEnumOperandLoc(ReadSourceLocation(Record, Idx)); + TL.setAttrEnumOperandLoc(ReadSourceLocation()); } void TypeLocReader::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitSubstTemplateTypeParmTypeLoc( SubstTemplateTypeParmTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitSubstTemplateTypeParmPackTypeLoc( SubstTemplateTypeParmPackTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitTemplateSpecializationTypeLoc( TemplateSpecializationTypeLoc TL) { - TL.setTemplateKeywordLoc(ReadSourceLocation(Record, Idx)); - TL.setTemplateNameLoc(ReadSourceLocation(Record, Idx)); - TL.setLAngleLoc(ReadSourceLocation(Record, Idx)); - TL.setRAngleLoc(ReadSourceLocation(Record, Idx)); + TL.setTemplateKeywordLoc(ReadSourceLocation()); + TL.setTemplateNameLoc(ReadSourceLocation()); + TL.setLAngleLoc(ReadSourceLocation()); + TL.setRAngleLoc(ReadSourceLocation()); for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i) - TL.setArgLocInfo(i, - Reader.GetTemplateArgumentLocInfo(F, - TL.getTypePtr()->getArg(i).getKind(), - Record, Idx)); + TL.setArgLocInfo( + i, + Reader->GetTemplateArgumentLocInfo( + *F, TL.getTypePtr()->getArg(i).getKind(), Record, Idx)); } void TypeLocReader::VisitParenTypeLoc(ParenTypeLoc TL) { - TL.setLParenLoc(ReadSourceLocation(Record, Idx)); - TL.setRParenLoc(ReadSourceLocation(Record, Idx)); + TL.setLParenLoc(ReadSourceLocation()); + TL.setRParenLoc(ReadSourceLocation()); } void TypeLocReader::VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) { - TL.setElaboratedKeywordLoc(ReadSourceLocation(Record, Idx)); - TL.setQualifierLoc(Reader.ReadNestedNameSpecifierLoc(F, Record, Idx)); + TL.setElaboratedKeywordLoc(ReadSourceLocation()); + TL.setQualifierLoc(ReadNestedNameSpecifierLoc()); } void TypeLocReader::VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) { - TL.setElaboratedKeywordLoc(ReadSourceLocation(Record, Idx)); - TL.setQualifierLoc(Reader.ReadNestedNameSpecifierLoc(F, Record, Idx)); - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setElaboratedKeywordLoc(ReadSourceLocation()); + TL.setQualifierLoc(ReadNestedNameSpecifierLoc()); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitDependentTemplateSpecializationTypeLoc( DependentTemplateSpecializationTypeLoc TL) { - TL.setElaboratedKeywordLoc(ReadSourceLocation(Record, Idx)); - TL.setQualifierLoc(Reader.ReadNestedNameSpecifierLoc(F, Record, Idx)); - TL.setTemplateKeywordLoc(ReadSourceLocation(Record, Idx)); - TL.setTemplateNameLoc(ReadSourceLocation(Record, Idx)); - TL.setLAngleLoc(ReadSourceLocation(Record, Idx)); - TL.setRAngleLoc(ReadSourceLocation(Record, Idx)); + TL.setElaboratedKeywordLoc(ReadSourceLocation()); + TL.setQualifierLoc(ReadNestedNameSpecifierLoc()); + TL.setTemplateKeywordLoc(ReadSourceLocation()); + TL.setTemplateNameLoc(ReadSourceLocation()); + TL.setLAngleLoc(ReadSourceLocation()); + TL.setRAngleLoc(ReadSourceLocation()); for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) - TL.setArgLocInfo(I, - Reader.GetTemplateArgumentLocInfo(F, - TL.getTypePtr()->getArg(I).getKind(), - Record, Idx)); + TL.setArgLocInfo( + I, + Reader->GetTemplateArgumentLocInfo( + *F, TL.getTypePtr()->getArg(I).getKind(), Record, Idx)); } void TypeLocReader::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) { - TL.setEllipsisLoc(ReadSourceLocation(Record, Idx)); + TL.setEllipsisLoc(ReadSourceLocation()); } void TypeLocReader::VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) { - TL.setNameLoc(ReadSourceLocation(Record, Idx)); + TL.setNameLoc(ReadSourceLocation()); } void TypeLocReader::VisitObjCTypeParamTypeLoc(ObjCTypeParamTypeLoc TL) { if (TL.getNumProtocols()) { - TL.setProtocolLAngleLoc(ReadSourceLocation(Record, Idx)); - TL.setProtocolRAngleLoc(ReadSourceLocation(Record, Idx)); + TL.setProtocolLAngleLoc(ReadSourceLocation()); + TL.setProtocolRAngleLoc(ReadSourceLocation()); } for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i) - TL.setProtocolLoc(i, ReadSourceLocation(Record, Idx)); + TL.setProtocolLoc(i, ReadSourceLocation()); } void TypeLocReader::VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) { TL.setHasBaseTypeAsWritten(Record[Idx++]); - TL.setTypeArgsLAngleLoc(ReadSourceLocation(Record, Idx)); - TL.setTypeArgsRAngleLoc(ReadSourceLocation(Record, Idx)); + TL.setTypeArgsLAngleLoc(ReadSourceLocation()); + TL.setTypeArgsRAngleLoc(ReadSourceLocation()); for (unsigned i = 0, e = TL.getNumTypeArgs(); i != e; ++i) - TL.setTypeArgTInfo(i, Reader.GetTypeSourceInfo(F, Record, Idx)); - TL.setProtocolLAngleLoc(ReadSourceLocation(Record, Idx)); - TL.setProtocolRAngleLoc(ReadSourceLocation(Record, Idx)); + TL.setTypeArgTInfo(i, GetTypeSourceInfo()); + TL.setProtocolLAngleLoc(ReadSourceLocation()); + TL.setProtocolRAngleLoc(ReadSourceLocation()); for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i) - TL.setProtocolLoc(i, ReadSourceLocation(Record, Idx)); + TL.setProtocolLoc(i, ReadSourceLocation()); } void TypeLocReader::VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) { - TL.setStarLoc(ReadSourceLocation(Record, Idx)); + TL.setStarLoc(ReadSourceLocation()); } void TypeLocReader::VisitAtomicTypeLoc(AtomicTypeLoc TL) { - TL.setKWLoc(ReadSourceLocation(Record, Idx)); - TL.setLParenLoc(ReadSourceLocation(Record, Idx)); - TL.setRParenLoc(ReadSourceLocation(Record, Idx)); + TL.setKWLoc(ReadSourceLocation()); + TL.setLParenLoc(ReadSourceLocation()); + TL.setRParenLoc(ReadSourceLocation()); } void TypeLocReader::VisitPipeTypeLoc(PipeTypeLoc TL) { - TL.setKWLoc(ReadSourceLocation(Record, Idx)); + TL.setKWLoc(ReadSourceLocation()); } -TypeSourceInfo *ASTReader::GetTypeSourceInfo(ModuleFile &F, - const RecordData &Record, - unsigned &Idx) { +TypeSourceInfo * +ASTReader::GetTypeSourceInfo(ModuleFile &F, const ASTReader::RecordData &Record, + unsigned &Idx) { QualType InfoTy = readType(F, Record, Idx); if (InfoTy.isNull()) return nullptr; TypeSourceInfo *TInfo = getContext().CreateTypeSourceInfo(InfoTy); - TypeLocReader TLR(*this, F, Record, Idx); + TypeLocReader TLR(F, *this, Record, Idx); for (TypeLoc TL = TInfo->getTypeLoc(); !TL.isNull(); TL = TL.getNextTypeLoc()) TLR.Visit(TL); return TInfo; @@ -6277,9 +6396,6 @@ QualType ASTReader::GetType(TypeID ID) { case PREDEF_TYPE_QUEUE_ID: T = Context.OCLQueueTy; break; - case PREDEF_TYPE_NDRANGE_ID: - T = Context.OCLNDRangeTy; - break; case PREDEF_TYPE_RESERVE_ID_ID: T = Context.OCLReserveIDTy; break; @@ -6328,18 +6444,21 @@ QualType ASTReader::getLocalType(ModuleFile &F, unsigned LocalID) { return GetType(getGlobalTypeID(F, LocalID)); } -serialization::TypeID +serialization::TypeID ASTReader::getGlobalTypeID(ModuleFile &F, unsigned LocalID) const { unsigned FastQuals = LocalID & Qualifiers::FastMask; unsigned LocalIndex = LocalID >> Qualifiers::FastWidth; - + if (LocalIndex < NUM_PREDEF_TYPE_IDS) return LocalID; + if (!F.ModuleOffsetMap.empty()) + ReadModuleOffsetMap(F); + ContinuousRangeMap::iterator I = F.TypeRemap.find(LocalIndex - NUM_PREDEF_TYPE_IDS); assert(I != F.TypeRemap.end() && "Invalid index into type index remap"); - + unsigned GlobalIndex = LocalIndex + I->second; return (GlobalIndex << Qualifiers::FastWidth) | FastQuals; } @@ -6355,18 +6474,18 @@ ASTReader::GetTemplateArgumentLocInfo(ModuleFile &F, case TemplateArgument::Type: return GetTypeSourceInfo(F, Record, Index); case TemplateArgument::Template: { - NestedNameSpecifierLoc QualifierLoc = ReadNestedNameSpecifierLoc(F, Record, + NestedNameSpecifierLoc QualifierLoc = ReadNestedNameSpecifierLoc(F, Record, Index); SourceLocation TemplateNameLoc = ReadSourceLocation(F, Record, Index); return TemplateArgumentLocInfo(QualifierLoc, TemplateNameLoc, SourceLocation()); } case TemplateArgument::TemplateExpansion: { - NestedNameSpecifierLoc QualifierLoc = ReadNestedNameSpecifierLoc(F, Record, + NestedNameSpecifierLoc QualifierLoc = ReadNestedNameSpecifierLoc(F, Record, Index); SourceLocation TemplateNameLoc = ReadSourceLocation(F, Record, Index); SourceLocation EllipsisLoc = ReadSourceLocation(F, Record, Index); - return TemplateArgumentLocInfo(QualifierLoc, TemplateNameLoc, + return TemplateArgumentLocInfo(QualifierLoc, TemplateNameLoc, EllipsisLoc); } case TemplateArgument::Null: @@ -6410,12 +6529,6 @@ Decl *ASTReader::GetExternalDecl(uint32_t ID) { return GetDecl(ID); } -template -static void completeRedeclChainForTemplateSpecialization(Decl *D) { - if (auto *TSD = dyn_cast(D)) - TSD->getSpecializedTemplate()->LoadLazySpecializations(); -} - void ASTReader::CompleteRedeclChain(const Decl *D) { if (NumCurrentElementsDeserializing) { // We arrange to not care about the complete redeclaration chain while we're @@ -6511,15 +6624,18 @@ CXXBaseSpecifier *ASTReader::GetExternalCXXBaseSpecifiers(uint64_t Offset) { return Bases; } -serialization::DeclID +serialization::DeclID ASTReader::getGlobalDeclID(ModuleFile &F, LocalDeclID LocalID) const { if (LocalID < NUM_PREDEF_DECL_IDS) return LocalID; + if (!F.ModuleOffsetMap.empty()) + ReadModuleOffsetMap(F); + ContinuousRangeMap::iterator I = F.DeclRemap.find(LocalID - NUM_PREDEF_DECL_IDS); assert(I != F.DeclRemap.end() && "Invalid index into decl index remap"); - + return LocalID + I->second; } @@ -6529,7 +6645,7 @@ bool ASTReader::isDeclIDFromModule(serialization::GlobalDeclID ID, if (ID < NUM_PREDEF_DECL_IDS) return false; - return ID - NUM_PREDEF_DECL_IDS >= M.BaseDeclID && + return ID - NUM_PREDEF_DECL_IDS >= M.BaseDeclID && ID - NUM_PREDEF_DECL_IDS < M.BaseDeclID + M.LocalNumDecls; } @@ -6667,11 +6783,11 @@ Decl *ASTReader::GetDecl(DeclID ID) { return DeclsLoaded[Index]; } -DeclID ASTReader::mapGlobalIDToModuleFileGlobalID(ModuleFile &M, +DeclID ASTReader::mapGlobalIDToModuleFileGlobalID(ModuleFile &M, DeclID GlobalID) { if (GlobalID < NUM_PREDEF_DECL_IDS) return GlobalID; - + GlobalDeclMapType::const_iterator I = GlobalDeclMap.find(GlobalID); assert(I != GlobalDeclMap.end() && "Corrupted global declaration map"); ModuleFile *Owner = I->second; @@ -6680,18 +6796,18 @@ DeclID ASTReader::mapGlobalIDToModuleFileGlobalID(ModuleFile &M, = M.GlobalToLocalDeclIDs.find(Owner); if (Pos == M.GlobalToLocalDeclIDs.end()) return 0; - + return GlobalID - Owner->BaseDeclID + Pos->second; } -serialization::DeclID ASTReader::ReadDeclID(ModuleFile &F, +serialization::DeclID ASTReader::ReadDeclID(ModuleFile &F, const RecordData &Record, unsigned &Idx) { if (Idx >= Record.size()) { Error("Corrupted AST file"); return 0; } - + return getGlobalDeclID(F, Record[Idx++]); } @@ -6707,6 +6823,9 @@ Stmt *ASTReader::GetExternalDeclStmt(uint64_t Offset) { // Offset here is a global offset across the entire chain. RecordLocation Loc = getLocalBitOffset(Offset); Loc.F->DeclsCursor.JumpToBit(Loc.Offset); + assert(NumCurrentElementsDeserializing == 0 && + "should not be called while already deserializing"); + Deserializing D(this); return ReadStmtFromStream(*Loc.F); } @@ -6823,7 +6942,7 @@ void ASTReader::FindFileRegionDecls(FileID File, EndLoc, DIDComp); if (EndIt != DInfo.Decls.end()) ++EndIt; - + for (ArrayRef::iterator DIt = BeginIt; DIt != EndIt; ++DIt) Decls.push_back(GetDecl(getGlobalDeclID(*DInfo.Mod, *DIt))); @@ -7034,23 +7153,23 @@ void ASTReader::PrintStats() { std::fprintf(stderr, "\n"); GlobalIndex->printStats(); } - + std::fprintf(stderr, "\n"); dump(); std::fprintf(stderr, "\n"); } template -static void +LLVM_DUMP_METHOD static void dumpModuleIDMap(StringRef Name, - const ContinuousRangeMap &Map) { if (Map.begin() == Map.end()) return; - + typedef ContinuousRangeMap MapType; llvm::errs() << Name << ":\n"; - for (typename MapType::const_iterator I = Map.begin(), IEnd = Map.end(); + for (typename MapType::const_iterator I = Map.begin(), IEnd = Map.end(); I != IEnd; ++I) { llvm::errs() << " " << I->first << " -> " << I->second->FileName << "\n"; @@ -7067,22 +7186,19 @@ LLVM_DUMP_METHOD void ASTReader::dump() { dumpModuleIDMap("Global macro map", GlobalMacroMap); dumpModuleIDMap("Global submodule map", GlobalSubmoduleMap); dumpModuleIDMap("Global selector map", GlobalSelectorMap); - dumpModuleIDMap("Global preprocessed entity map", + dumpModuleIDMap("Global preprocessed entity map", GlobalPreprocessedEntityMap); - + llvm::errs() << "\n*** PCH/Modules Loaded:"; - for (ModuleManager::ModuleConstIterator M = ModuleMgr.begin(), - MEnd = ModuleMgr.end(); - M != MEnd; ++M) - (*M)->dump(); + for (ModuleFile &M : ModuleMgr) + M.dump(); } /// Return the amount of memory used by memory buffers, breaking down /// by heap-backed versus mmap'ed memory. void ASTReader::getMemoryBufferSizes(MemoryBufferSizes &sizes) const { - for (ModuleConstIterator I = ModuleMgr.begin(), - E = ModuleMgr.end(); I != E; ++I) { - if (llvm::MemoryBuffer *buf = (*I)->Buffer.get()) { + for (ModuleFile &I : ModuleMgr) { + if (llvm::MemoryBuffer *buf = I.Buffer.get()) { size_t bytes = buf->getBufferSize(); switch (buf->getBufferKind()) { case llvm::MemoryBuffer::MemoryBuffer_Malloc: @@ -7114,14 +7230,9 @@ void ASTReader::InitializeSema(Sema &S) { SemaObj->FPFeatures.fp_contract = FPPragmaOptions[0]; } - // FIXME: What happens if these are changed by a module import? - if (!OpenCLExtensions.empty()) { - unsigned I = 0; -#define OPENCLEXT(nm) SemaObj->OpenCLFeatures.nm = OpenCLExtensions[I++]; -#include "clang/Basic/OpenCLExtensions.def" - - assert(OpenCLExtensions.size() == I && "Wrong number of OPENCL_EXTENSIONS"); - } + SemaObj->OpenCLFeatures.copy(OpenCLExtensions); + SemaObj->OpenCLTypeExtMap = OpenCLTypeExtMap; + SemaObj->OpenCLDeclExtMap = OpenCLDeclExtMap; UpdateSema(); } @@ -7321,7 +7432,7 @@ namespace serialization { bool operator()(ModuleFile &M) { if (!M.SelectorLookupTable) return false; - + // If we've already searched this module file, skip it now. if (M.Generation <= PriorGeneration) return true; @@ -7351,14 +7462,14 @@ namespace serialization { FactoryHasMoreThanOneDecl = Data.FactoryHasMoreThanOneDecl; return true; } - + /// \brief Retrieve the instance methods found by this visitor. - ArrayRef getInstanceMethods() const { - return InstanceMethods; + ArrayRef getInstanceMethods() const { + return InstanceMethods; } /// \brief Retrieve the instance methods found by this visitor. - ArrayRef getFactoryMethods() const { + ArrayRef getFactoryMethods() const { return FactoryMethods; } @@ -7380,14 +7491,14 @@ static void addMethodsToPool(Sema &S, ArrayRef Methods, S.addMethodToGlobalList(&List, Methods[I]); } } - + void ASTReader::ReadMethodPool(Selector Sel) { // Get the selector generation and update it to the current generation. unsigned &Generation = SelectorGeneration[Sel]; unsigned PriorGeneration = Generation; Generation = getGeneration(); SelectorOutOfDate[Sel] = false; - + // Search for methods defined with this selector. ++NumMethodPoolLookups; ReadMethodPoolVisitor Visitor(*this, Sel, PriorGeneration); @@ -7401,7 +7512,7 @@ void ASTReader::ReadMethodPool(Selector Sel) { if (!getSema()) return; - + Sema &S = *getSema(); Sema::GlobalMethodPool::iterator Pos = S.MethodPool.insert(std::make_pair(Sel, Sema::GlobalMethods())).first; @@ -7426,9 +7537,9 @@ void ASTReader::updateOutOfDateSelector(Selector Sel) { void ASTReader::ReadKnownNamespaces( SmallVectorImpl &Namespaces) { Namespaces.clear(); - + for (unsigned I = 0, N = KnownNamespaces.size(); I != N; ++I) { - if (NamespaceDecl *Namespace + if (NamespaceDecl *Namespace = dyn_cast_or_null(GetDecl(KnownNamespaces[I]))) Namespaces.push_back(Namespace); } @@ -7517,7 +7628,7 @@ void ASTReader::ReadReferencedSelectors( SmallVectorImpl > &Sels) { if (ReferencedSelectorsData.empty()) return; - + // If there are @selector references added them to its pool. This is for // implementation of -Wselector. unsigned int DataSize = ReferencedSelectorsData.size()-1; @@ -7537,9 +7648,9 @@ void ASTReader::ReadWeakUndeclaredIdentifiers( return; for (unsigned I = 0, N = WeakUndeclaredIdentifiers.size(); I < N; /*none*/) { - IdentifierInfo *WeakId + IdentifierInfo *WeakId = DecodeIdentifierInfo(WeakUndeclaredIdentifiers[I++]); - IdentifierInfo *AliasId + IdentifierInfo *AliasId = DecodeIdentifierInfo(WeakUndeclaredIdentifiers[I++]); SourceLocation Loc = SourceLocation::getFromRawEncoding(WeakUndeclaredIdentifiers[I++]); @@ -7559,7 +7670,7 @@ void ASTReader::ReadUsedVTables(SmallVectorImpl &VTables) { VT.DefinitionRequired = VTableUses[Idx++]; VTables.push_back(VT); } - + VTableUses.clear(); } @@ -7571,7 +7682,7 @@ void ASTReader::ReadPendingInstantiations( = SourceLocation::getFromRawEncoding(PendingInstantiations[Idx++]); Pending.push_back(std::make_pair(D, Loc)); - } + } PendingInstantiations.clear(); } @@ -7703,12 +7814,15 @@ IdentifierInfo *ASTReader::getLocalIdentifier(ModuleFile &M, unsigned LocalID) { IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, unsigned LocalID) { if (LocalID < NUM_PREDEF_IDENT_IDS) return LocalID; - + + if (!M.ModuleOffsetMap.empty()) + ReadModuleOffsetMap(M); + ContinuousRangeMap::iterator I = M.IdentifierRemap.find(LocalID - NUM_PREDEF_IDENT_IDS); - assert(I != M.IdentifierRemap.end() + assert(I != M.IdentifierRemap.end() && "Invalid index into identifier index remap"); - + return LocalID + I->second; } @@ -7729,7 +7843,7 @@ MacroInfo *ASTReader::getMacro(MacroID ID) { ModuleFile *M = I->second; unsigned Index = ID - M->BaseMacroID; MacrosLoaded[ID] = ReadMacroRecord(*M, M->MacroOffsets[Index]); - + if (DeserializationListener) DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS, MacrosLoaded[ID]); @@ -7742,6 +7856,9 @@ MacroID ASTReader::getGlobalMacroID(ModuleFile &M, unsigned LocalID) { if (LocalID < NUM_PREDEF_MACRO_IDS) return LocalID; + if (!M.ModuleOffsetMap.empty()) + ReadModuleOffsetMap(M); + ContinuousRangeMap::iterator I = M.MacroRemap.find(LocalID - NUM_PREDEF_MACRO_IDS); assert(I != M.MacroRemap.end() && "Invalid index into macro index remap"); @@ -7753,12 +7870,15 @@ serialization::SubmoduleID ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) { if (LocalID < NUM_PREDEF_SUBMODULE_IDS) return LocalID; - + + if (!M.ModuleOffsetMap.empty()) + ReadModuleOffsetMap(M); + ContinuousRangeMap::iterator I = M.SubmoduleRemap.find(LocalID - NUM_PREDEF_SUBMODULE_IDS); - assert(I != M.SubmoduleRemap.end() + assert(I != M.SubmoduleRemap.end() && "Invalid index into submodule index remap"); - + return LocalID + I->second; } @@ -7767,12 +7887,12 @@ Module *ASTReader::getSubmodule(SubmoduleID GlobalID) { assert(GlobalID == 0 && "Unhandled global submodule ID"); return nullptr; } - + if (GlobalID > SubmodulesLoaded.size()) { Error("submodule ID out of range in AST file"); return nullptr; } - + return SubmodulesLoaded[GlobalID - NUM_PREDEF_SUBMODULE_IDS]; } @@ -7817,7 +7937,8 @@ ASTReader::getSourceDescriptor(unsigned ID) { // If there is only a single PCH, return it instead. // Chained PCH are not suported. - if (ModuleMgr.size() == 1) { + const auto &PCHChain = ModuleMgr.pch_modules(); + if (std::distance(std::begin(PCHChain), std::end(PCHChain))) { ModuleFile &MF = ModuleMgr.getPrimaryModule(); StringRef ModuleName = llvm::sys::path::filename(MF.OriginalSourceFileName); StringRef FileName = llvm::sys::path::filename(MF.FileName); @@ -7827,6 +7948,18 @@ ASTReader::getSourceDescriptor(unsigned ID) { return None; } +ExternalASTSource::ExtKind ASTReader::hasExternalDefinitions(unsigned ID) { + const Module *M = getSubmodule(ID); + if (!M || !M->WithCodegen) + return EK_ReplyHazy; + + ModuleFile *MF = ModuleMgr.lookup(M->getASTFile()); + assert(MF); // ? + if (MF->Kind == ModuleKind::MK_MainFile) + return EK_Never; + return EK_Always; +} + Selector ASTReader::getLocalSelector(ModuleFile &M, unsigned LocalID) { return DecodeSelector(getGlobalSelectorID(M, LocalID)); } @@ -7869,17 +8002,20 @@ serialization::SelectorID ASTReader::getGlobalSelectorID(ModuleFile &M, unsigned LocalID) const { if (LocalID < NUM_PREDEF_SELECTOR_IDS) return LocalID; - + + if (!M.ModuleOffsetMap.empty()) + ReadModuleOffsetMap(M); + ContinuousRangeMap::iterator I = M.SelectorRemap.find(LocalID - NUM_PREDEF_SELECTOR_IDS); - assert(I != M.SelectorRemap.end() + assert(I != M.SelectorRemap.end() && "Invalid index into selector index remap"); - + return LocalID + I->second; } DeclarationName -ASTReader::ReadDeclarationName(ModuleFile &F, +ASTReader::ReadDeclarationName(ModuleFile &F, const RecordData &Record, unsigned &Idx) { DeclarationName::NameKind Kind = (DeclarationName::NameKind)Record[Idx++]; switch (Kind) { @@ -7899,6 +8035,10 @@ ASTReader::ReadDeclarationName(ModuleFile &F, return Context.DeclarationNames.getCXXDestructorName( Context.getCanonicalType(readType(F, Record, Idx))); + case DeclarationName::CXXDeductionGuideName: + return Context.DeclarationNames.getCXXDeductionGuideName( + ReadDeclAs(F, Record, Idx)); + case DeclarationName::CXXConversionFunctionName: return Context.DeclarationNames.getCXXConversionFunctionName( Context.getCanonicalType(readType(F, Record, Idx))); @@ -7946,6 +8086,7 @@ void ASTReader::ReadDeclarationNameLoc(ModuleFile &F, case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: case DeclarationName::CXXUsingDirective: + case DeclarationName::CXXDeductionGuideName: break; } } @@ -7973,7 +8114,7 @@ void ASTReader::ReadQualifierInfo(ModuleFile &F, QualifierInfo &Info, } TemplateName -ASTReader::ReadTemplateName(ModuleFile &F, const RecordData &Record, +ASTReader::ReadTemplateName(ModuleFile &F, const RecordData &Record, unsigned &Idx) { TemplateName::NameKind Kind = (TemplateName::NameKind)Record[Idx++]; switch (Kind) { @@ -8000,7 +8141,7 @@ ASTReader::ReadTemplateName(ModuleFile &F, const RecordData &Record, NestedNameSpecifier *NNS = ReadNestedNameSpecifier(F, Record, Idx); if (Record[Idx++]) // isIdentifier return Context.getDependentTemplateName(NNS, - GetIdentifierInfo(F, Record, + GetIdentifierInfo(F, Record, Idx)); return Context.getDependentTemplateName(NNS, (OverloadedOperatorKind)Record[Idx++]); @@ -8013,17 +8154,17 @@ ASTReader::ReadTemplateName(ModuleFile &F, const RecordData &Record, TemplateName replacement = ReadTemplateName(F, Record, Idx); return Context.getSubstTemplateTemplateParm(param, replacement); } - + case TemplateName::SubstTemplateTemplateParmPack: { - TemplateTemplateParmDecl *Param + TemplateTemplateParmDecl *Param = ReadDeclAs(F, Record, Idx); if (!Param) return TemplateName(); - + TemplateArgument ArgPack = ReadTemplateArgument(F, Record, Idx); if (ArgPack.getKind() != TemplateArgument::Pack) return TemplateName(); - + return Context.getSubstTemplateTemplateParmPack(Param, ArgPack); } } @@ -8061,7 +8202,7 @@ TemplateArgument ASTReader::ReadTemplateArgument(ModuleFile &F, QualType T = readType(F, Record, Idx); return TemplateArgument(Context, Value, T); } - case TemplateArgument::Template: + case TemplateArgument::Template: return TemplateArgument(ReadTemplateName(F, Record, Idx)); case TemplateArgument::TemplateExpansion: { TemplateName Name = ReadTemplateName(F, Record, Idx); @@ -8137,7 +8278,7 @@ ASTReader::ReadCXXBaseSpecifier(ModuleFile &F, TypeSourceInfo *TInfo = GetTypeSourceInfo(F, Record, Idx); SourceRange Range = ReadSourceRange(F, Record, Idx); SourceLocation EllipsisLoc = ReadSourceLocation(F, Record, Idx); - CXXBaseSpecifier Result(Range, isVirtual, isBaseOfClass, AS, TInfo, + CXXBaseSpecifier Result(Range, isVirtual, isBaseOfClass, AS, TInfo, EllipsisLoc); Result.setInheritConstructors(inheritConstructors); return Result; @@ -8264,7 +8405,7 @@ ASTReader::ReadNestedNameSpecifier(ModuleFile &F, } NestedNameSpecifierLoc -ASTReader::ReadNestedNameSpecifierLoc(ModuleFile &F, const RecordData &Record, +ASTReader::ReadNestedNameSpecifierLoc(ModuleFile &F, const RecordData &Record, unsigned &Idx) { unsigned N = Record[Idx++]; NestedNameSpecifierLocBuilder Builder; @@ -8273,7 +8414,7 @@ ASTReader::ReadNestedNameSpecifierLoc(ModuleFile &F, const RecordData &Record, = (NestedNameSpecifier::SpecifierKind)Record[Idx++]; switch (Kind) { case NestedNameSpecifier::Identifier: { - IdentifierInfo *II = GetIdentifierInfo(F, Record, Idx); + IdentifierInfo *II = GetIdentifierInfo(F, Record, Idx); SourceRange Range = ReadSourceRange(F, Record, Idx); Builder.Extend(Context, II, Range.getBegin(), Range.getEnd()); break; @@ -8302,7 +8443,7 @@ ASTReader::ReadNestedNameSpecifierLoc(ModuleFile &F, const RecordData &Record, SourceLocation ColonColonLoc = ReadSourceLocation(F, Record, Idx); // FIXME: 'template' keyword location not saved anywhere, so we fake it. - Builder.Extend(Context, + Builder.Extend(Context, Template? T->getTypeLoc().getBeginLoc() : SourceLocation(), T->getTypeLoc(), ColonColonLoc); break; @@ -8371,7 +8512,7 @@ std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record, return Filename; } -VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record, +VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record, unsigned &Idx) { unsigned Major = Record[Idx++]; unsigned Minor = Record[Idx++]; @@ -8383,18 +8524,18 @@ VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record, return VersionTuple(Major, Minor - 1, Subminor - 1); } -CXXTemporary *ASTReader::ReadCXXTemporary(ModuleFile &F, +CXXTemporary *ASTReader::ReadCXXTemporary(ModuleFile &F, const RecordData &Record, unsigned &Idx) { CXXDestructorDecl *Decl = ReadDeclAs(F, Record, Idx); return CXXTemporary::Create(Context, Decl); } -DiagnosticBuilder ASTReader::Diag(unsigned DiagID) { +DiagnosticBuilder ASTReader::Diag(unsigned DiagID) const { return Diag(CurrentImportLoc, DiagID); } -DiagnosticBuilder ASTReader::Diag(SourceLocation Loc, unsigned DiagID) { +DiagnosticBuilder ASTReader::Diag(SourceLocation Loc, unsigned DiagID) const { return Diags.Report(Loc, DiagID); } @@ -8469,10 +8610,29 @@ void ASTReader::ReadComments() { } } NextCursor: + // De-serialized SourceLocations get negative FileIDs for other modules, + // potentially invalidating the original order. Sort it again. + std::sort(Comments.begin(), Comments.end(), + BeforeThanCompare(SourceMgr)); Context.Comments.addDeserializedComments(Comments); } } +void ASTReader::visitInputFiles(serialization::ModuleFile &MF, + bool IncludeSystem, bool Complain, + llvm::function_ref Visitor) { + unsigned NumUserInputs = MF.NumUserInputFiles; + unsigned NumInputs = MF.InputFilesLoaded.size(); + assert(NumUserInputs <= NumInputs); + unsigned N = IncludeSystem ? NumInputs : NumUserInputs; + for (unsigned I = 0; I < N; ++I) { + bool IsSystem = I >= NumUserInputs; + InputFile IF = getInputFile(MF, I+1, Complain); + Visitor(IF, IsSystem); + } +} + std::string ASTReader::getOwningModuleNameForDiagnostic(const Decl *D) { // If we know the owning module, use it. if (Module *M = D->getImportedOwningModule()) @@ -8574,7 +8734,7 @@ void ASTReader::finishPendingActions() { // If we deserialized any C++ or Objective-C class definitions, any // Objective-C protocol definitions, or any redeclarable templates, make sure - // that all redeclarations point to the definitions. Note that this can only + // that all redeclarations point to the definitions. Note that this can only // happen now, after the redeclaration chains have been fully wired. for (Decl *D : PendingDefinitions) { if (TagDecl *TD = dyn_cast(D)) { @@ -8729,7 +8889,7 @@ void ASTReader::diagnoseOdrViolations() { // completed. We only really need to mark FieldDecls as invalid here. if (!isa(D)) D->setInvalidDecl(); - + // Ensure we don't accidentally recursively enter deserialization while // we're producing our diagnostic. Deserializing RecursionGuard(this); @@ -8769,24 +8929,504 @@ void ASTReader::diagnoseOdrViolations() { continue; bool Diagnosed = false; - for (auto *RD : Merge.second) { + CXXRecordDecl *FirstRecord = Merge.first; + std::string FirstModule = getOwningModuleNameForDiagnostic(FirstRecord); + for (CXXRecordDecl *SecondRecord : Merge.second) { // Multiple different declarations got merged together; tell the user // where they came from. - if (Merge.first != RD) { - // FIXME: Walk the definition, figure out what's different, - // and diagnose that. - if (!Diagnosed) { - std::string Module = getOwningModuleNameForDiagnostic(Merge.first); - Diag(Merge.first->getLocation(), - diag::err_module_odr_violation_different_definitions) - << Merge.first << Module.empty() << Module; - Diagnosed = true; + if (FirstRecord == SecondRecord) + continue; + + std::string SecondModule = getOwningModuleNameForDiagnostic(SecondRecord); + using DeclHashes = llvm::SmallVector, 4>; + DeclHashes FirstHashes; + DeclHashes SecondHashes; + ODRHash Hash; + + auto PopulateHashes = [&Hash, FirstRecord](DeclHashes &Hashes, + CXXRecordDecl *Record) { + for (auto *D : Record->decls()) { + // Due to decl merging, the first CXXRecordDecl is the parent of + // Decls in both records. + if (!ODRHash::isWhitelistedDecl(D, FirstRecord)) + continue; + Hash.clear(); + Hash.AddSubDecl(D); + Hashes.emplace_back(D, Hash.CalculateHash()); } + }; + PopulateHashes(FirstHashes, FirstRecord); + PopulateHashes(SecondHashes, SecondRecord); + + // Used with err_module_odr_violation_mismatch_decl and + // note_module_odr_violation_mismatch_decl + enum { + EndOfClass, + PublicSpecifer, + PrivateSpecifer, + ProtectedSpecifer, + StaticAssert, + Field, + CXXMethod, + Other + } FirstDiffType = Other, + SecondDiffType = Other; + + auto DifferenceSelector = [](Decl *D) { + assert(D && "valid Decl required"); + switch (D->getKind()) { + default: + return Other; + case Decl::AccessSpec: + switch (D->getAccess()) { + case AS_public: + return PublicSpecifer; + case AS_private: + return PrivateSpecifer; + case AS_protected: + return ProtectedSpecifer; + case AS_none: + break; + } + llvm_unreachable("Invalid access specifier"); + case Decl::StaticAssert: + return StaticAssert; + case Decl::Field: + return Field; + case Decl::CXXMethod: + return CXXMethod; + } + }; + + Decl *FirstDecl = nullptr; + Decl *SecondDecl = nullptr; + auto FirstIt = FirstHashes.begin(); + auto SecondIt = SecondHashes.begin(); + + // If there is a diagnoseable difference, FirstDiffType and + // SecondDiffType will not be Other and FirstDecl and SecondDecl will be + // filled in if not EndOfClass. + while (FirstIt != FirstHashes.end() || SecondIt != SecondHashes.end()) { + if (FirstIt != FirstHashes.end() && SecondIt != SecondHashes.end() && + FirstIt->second == SecondIt->second) { + ++FirstIt; + ++SecondIt; + continue; + } + + FirstDecl = FirstIt == FirstHashes.end() ? nullptr : FirstIt->first; + SecondDecl = SecondIt == SecondHashes.end() ? nullptr : SecondIt->first; - Diag(RD->getLocation(), + FirstDiffType = FirstDecl ? DifferenceSelector(FirstDecl) : EndOfClass; + SecondDiffType = + SecondDecl ? DifferenceSelector(SecondDecl) : EndOfClass; + + break; + } + + if (FirstDiffType == Other || SecondDiffType == Other) { + // Reaching this point means an unexpected Decl was encountered + // or no difference was detected. This causes a generic error + // message to be emitted. + Diag(FirstRecord->getLocation(), + diag::err_module_odr_violation_different_definitions) + << FirstRecord << FirstModule.empty() << FirstModule; + + Diag(SecondRecord->getLocation(), diag::note_module_odr_violation_different_definitions) - << getOwningModuleNameForDiagnostic(RD); + << SecondModule; + Diagnosed = true; + break; + } + + if (FirstDiffType != SecondDiffType) { + SourceLocation FirstLoc; + SourceRange FirstRange; + if (FirstDiffType == EndOfClass) { + FirstLoc = FirstRecord->getBraceRange().getEnd(); + } else { + FirstLoc = FirstIt->first->getLocation(); + FirstRange = FirstIt->first->getSourceRange(); + } + Diag(FirstLoc, diag::err_module_odr_violation_mismatch_decl) + << FirstRecord << FirstModule.empty() << FirstModule << FirstRange + << FirstDiffType; + + SourceLocation SecondLoc; + SourceRange SecondRange; + if (SecondDiffType == EndOfClass) { + SecondLoc = SecondRecord->getBraceRange().getEnd(); + } else { + SecondLoc = SecondDecl->getLocation(); + SecondRange = SecondDecl->getSourceRange(); + } + Diag(SecondLoc, diag::note_module_odr_violation_mismatch_decl) + << SecondModule << SecondRange << SecondDiffType; + Diagnosed = true; + break; } + + assert(FirstDiffType == SecondDiffType); + + // Used with err_module_odr_violation_mismatch_decl_diff and + // note_module_odr_violation_mismatch_decl_diff + enum ODRDeclDifference{ + StaticAssertCondition, + StaticAssertMessage, + StaticAssertOnlyMessage, + FieldName, + FieldTypeName, + FieldSingleBitField, + FieldDifferentWidthBitField, + FieldSingleMutable, + FieldSingleInitializer, + FieldDifferentInitializers, + MethodName, + MethodDeleted, + MethodVirtual, + MethodStatic, + MethodVolatile, + MethodConst, + MethodInline, + }; + + // These lambdas have the common portions of the ODR diagnostics. This + // has the same return as Diag(), so addition parameters can be passed + // in with operator<< + auto ODRDiagError = [FirstRecord, &FirstModule, this]( + SourceLocation Loc, SourceRange Range, ODRDeclDifference DiffType) { + return Diag(Loc, diag::err_module_odr_violation_mismatch_decl_diff) + << FirstRecord << FirstModule.empty() << FirstModule << Range + << DiffType; + }; + auto ODRDiagNote = [&SecondModule, this]( + SourceLocation Loc, SourceRange Range, ODRDeclDifference DiffType) { + return Diag(Loc, diag::note_module_odr_violation_mismatch_decl_diff) + << SecondModule << Range << DiffType; + }; + + auto ComputeODRHash = [&Hash](const Stmt* S) { + assert(S); + Hash.clear(); + Hash.AddStmt(S); + return Hash.CalculateHash(); + }; + + auto ComputeDeclNameODRHash = [&Hash](const DeclarationName Name) { + Hash.clear(); + Hash.AddDeclarationName(Name); + return Hash.CalculateHash(); + }; + + switch (FirstDiffType) { + case Other: + case EndOfClass: + case PublicSpecifer: + case PrivateSpecifer: + case ProtectedSpecifer: + llvm_unreachable("Invalid diff type"); + + case StaticAssert: { + StaticAssertDecl *FirstSA = cast(FirstDecl); + StaticAssertDecl *SecondSA = cast(SecondDecl); + + Expr *FirstExpr = FirstSA->getAssertExpr(); + Expr *SecondExpr = SecondSA->getAssertExpr(); + unsigned FirstODRHash = ComputeODRHash(FirstExpr); + unsigned SecondODRHash = ComputeODRHash(SecondExpr); + if (FirstODRHash != SecondODRHash) { + ODRDiagError(FirstExpr->getLocStart(), FirstExpr->getSourceRange(), + StaticAssertCondition); + ODRDiagNote(SecondExpr->getLocStart(), + SecondExpr->getSourceRange(), StaticAssertCondition); + Diagnosed = true; + break; + } + + StringLiteral *FirstStr = FirstSA->getMessage(); + StringLiteral *SecondStr = SecondSA->getMessage(); + assert((FirstStr || SecondStr) && "Both messages cannot be empty"); + if ((FirstStr && !SecondStr) || (!FirstStr && SecondStr)) { + SourceLocation FirstLoc, SecondLoc; + SourceRange FirstRange, SecondRange; + if (FirstStr) { + FirstLoc = FirstStr->getLocStart(); + FirstRange = FirstStr->getSourceRange(); + } else { + FirstLoc = FirstSA->getLocStart(); + FirstRange = FirstSA->getSourceRange(); + } + if (SecondStr) { + SecondLoc = SecondStr->getLocStart(); + SecondRange = SecondStr->getSourceRange(); + } else { + SecondLoc = SecondSA->getLocStart(); + SecondRange = SecondSA->getSourceRange(); + } + ODRDiagError(FirstLoc, FirstRange, StaticAssertOnlyMessage) + << (FirstStr == nullptr); + ODRDiagNote(SecondLoc, SecondRange, StaticAssertOnlyMessage) + << (SecondStr == nullptr); + Diagnosed = true; + break; + } + + if (FirstStr && SecondStr && + FirstStr->getString() != SecondStr->getString()) { + ODRDiagError(FirstStr->getLocStart(), FirstStr->getSourceRange(), + StaticAssertMessage); + ODRDiagNote(SecondStr->getLocStart(), SecondStr->getSourceRange(), + StaticAssertMessage); + Diagnosed = true; + break; + } + break; + } + case Field: { + FieldDecl *FirstField = cast(FirstDecl); + FieldDecl *SecondField = cast(SecondDecl); + IdentifierInfo *FirstII = FirstField->getIdentifier(); + IdentifierInfo *SecondII = SecondField->getIdentifier(); + if (FirstII->getName() != SecondII->getName()) { + ODRDiagError(FirstField->getLocation(), FirstField->getSourceRange(), + FieldName) + << FirstII; + ODRDiagNote(SecondField->getLocation(), SecondField->getSourceRange(), + FieldName) + << SecondII; + + Diagnosed = true; + break; + } + + assert( + Context.hasSameType(FirstField->getType(), SecondField->getType())); + + QualType FirstType = FirstField->getType(); + QualType SecondType = SecondField->getType(); + const TypedefType *FirstTypedef = dyn_cast(FirstType); + const TypedefType *SecondTypedef = dyn_cast(SecondType); + + if ((FirstTypedef && !SecondTypedef) || + (!FirstTypedef && SecondTypedef)) { + ODRDiagError(FirstField->getLocation(), FirstField->getSourceRange(), + FieldTypeName) + << FirstII << FirstType; + ODRDiagNote(SecondField->getLocation(), SecondField->getSourceRange(), + FieldTypeName) + << SecondII << SecondType; + + Diagnosed = true; + break; + } + + if (FirstTypedef && SecondTypedef) { + unsigned FirstHash = ComputeDeclNameODRHash( + FirstTypedef->getDecl()->getDeclName()); + unsigned SecondHash = ComputeDeclNameODRHash( + SecondTypedef->getDecl()->getDeclName()); + if (FirstHash != SecondHash) { + ODRDiagError(FirstField->getLocation(), + FirstField->getSourceRange(), FieldTypeName) + << FirstII << FirstType; + ODRDiagNote(SecondField->getLocation(), + SecondField->getSourceRange(), FieldTypeName) + << SecondII << SecondType; + + Diagnosed = true; + break; + } + } + + const bool IsFirstBitField = FirstField->isBitField(); + const bool IsSecondBitField = SecondField->isBitField(); + if (IsFirstBitField != IsSecondBitField) { + ODRDiagError(FirstField->getLocation(), FirstField->getSourceRange(), + FieldSingleBitField) + << FirstII << IsFirstBitField; + ODRDiagNote(SecondField->getLocation(), SecondField->getSourceRange(), + FieldSingleBitField) + << SecondII << IsSecondBitField; + Diagnosed = true; + break; + } + + if (IsFirstBitField && IsSecondBitField) { + ODRDiagError(FirstField->getLocation(), FirstField->getSourceRange(), + FieldDifferentWidthBitField) + << FirstII << FirstField->getBitWidth()->getSourceRange(); + ODRDiagNote(SecondField->getLocation(), SecondField->getSourceRange(), + FieldDifferentWidthBitField) + << SecondII << SecondField->getBitWidth()->getSourceRange(); + Diagnosed = true; + break; + } + + const bool IsFirstMutable = FirstField->isMutable(); + const bool IsSecondMutable = SecondField->isMutable(); + if (IsFirstMutable != IsSecondMutable) { + ODRDiagError(FirstField->getLocation(), FirstField->getSourceRange(), + FieldSingleMutable) + << FirstII << IsFirstMutable; + ODRDiagNote(SecondField->getLocation(), SecondField->getSourceRange(), + FieldSingleMutable) + << SecondII << IsSecondMutable; + Diagnosed = true; + break; + } + + const Expr *FirstInitializer = FirstField->getInClassInitializer(); + const Expr *SecondInitializer = SecondField->getInClassInitializer(); + if ((!FirstInitializer && SecondInitializer) || + (FirstInitializer && !SecondInitializer)) { + ODRDiagError(FirstField->getLocation(), FirstField->getSourceRange(), + FieldSingleInitializer) + << FirstII << (FirstInitializer != nullptr); + ODRDiagNote(SecondField->getLocation(), SecondField->getSourceRange(), + FieldSingleInitializer) + << SecondII << (SecondInitializer != nullptr); + Diagnosed = true; + break; + } + + if (FirstInitializer && SecondInitializer) { + unsigned FirstInitHash = ComputeODRHash(FirstInitializer); + unsigned SecondInitHash = ComputeODRHash(SecondInitializer); + if (FirstInitHash != SecondInitHash) { + ODRDiagError(FirstField->getLocation(), + FirstField->getSourceRange(), + FieldDifferentInitializers) + << FirstII << FirstInitializer->getSourceRange(); + ODRDiagNote(SecondField->getLocation(), + SecondField->getSourceRange(), + FieldDifferentInitializers) + << SecondII << SecondInitializer->getSourceRange(); + Diagnosed = true; + break; + } + } + + break; + } + case CXXMethod: { + const CXXMethodDecl *FirstMethod = cast(FirstDecl); + const CXXMethodDecl *SecondMethod = cast(SecondDecl); + auto FirstName = FirstMethod->getDeclName(); + auto SecondName = SecondMethod->getDeclName(); + if (FirstName != SecondName) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodName) + << FirstName; + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodName) + << SecondName; + + Diagnosed = true; + break; + } + + const bool FirstDeleted = FirstMethod->isDeleted(); + const bool SecondDeleted = SecondMethod->isDeleted(); + if (FirstDeleted != SecondDeleted) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodDeleted) + << FirstName << FirstDeleted; + + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodDeleted) + << SecondName << SecondDeleted; + Diagnosed = true; + break; + } + + const bool FirstVirtual = FirstMethod->isVirtualAsWritten(); + const bool SecondVirtual = SecondMethod->isVirtualAsWritten(); + const bool FirstPure = FirstMethod->isPure(); + const bool SecondPure = SecondMethod->isPure(); + if ((FirstVirtual || SecondVirtual) && + (FirstVirtual != SecondVirtual || FirstPure != SecondPure)) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodVirtual) + << FirstName << FirstPure << FirstVirtual; + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodVirtual) + << SecondName << SecondPure << SecondVirtual; + Diagnosed = true; + break; + } + + // CXXMethodDecl::isStatic uses the canonical Decl. With Decl merging, + // FirstDecl is the canonical Decl of SecondDecl, so the storage + // class needs to be checked instead. + const auto FirstStorage = FirstMethod->getStorageClass(); + const auto SecondStorage = SecondMethod->getStorageClass(); + const bool FirstStatic = FirstStorage == SC_Static; + const bool SecondStatic = SecondStorage == SC_Static; + if (FirstStatic != SecondStatic) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodStatic) + << FirstName << FirstStatic; + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodStatic) + << SecondName << SecondStatic; + Diagnosed = true; + break; + } + + const bool FirstVolatile = FirstMethod->isVolatile(); + const bool SecondVolatile = SecondMethod->isVolatile(); + if (FirstVolatile != SecondVolatile) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodVolatile) + << FirstName << FirstVolatile; + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodVolatile) + << SecondName << SecondVolatile; + Diagnosed = true; + break; + } + + const bool FirstConst = FirstMethod->isConst(); + const bool SecondConst = SecondMethod->isConst(); + if (FirstConst != SecondConst) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodConst) + << FirstName << FirstConst; + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodConst) + << SecondName << SecondConst; + Diagnosed = true; + break; + } + + const bool FirstInline = FirstMethod->isInlineSpecified(); + const bool SecondInline = SecondMethod->isInlineSpecified(); + if (FirstInline != SecondInline) { + ODRDiagError(FirstMethod->getLocation(), + FirstMethod->getSourceRange(), MethodInline) + << FirstName << FirstInline; + ODRDiagNote(SecondMethod->getLocation(), + SecondMethod->getSourceRange(), MethodInline) + << SecondName << SecondInline; + Diagnosed = true; + break; + } + + break; + } + } + + if (Diagnosed == true) + continue; + + Diag(FirstRecord->getLocation(), + diag::err_module_odr_violation_different_definitions) + << FirstRecord << FirstModule.empty() << FirstModule; + + Diag(SecondRecord->getLocation(), + diag::note_module_odr_violation_different_definitions) + << SecondModule; + Diagnosed = true; } if (!Diagnosed) { @@ -8804,7 +9444,7 @@ void ASTReader::diagnoseOdrViolations() { } void ASTReader::StartedDeserializing() { - if (++NumCurrentElementsDeserializing == 1 && ReadTimer.get()) + if (++NumCurrentElementsDeserializing == 1 && ReadTimer.get()) ReadTimer->startTimer(); } @@ -8872,44 +9512,26 @@ void ASTReader::pushExternalDeclIntoScope(NamedDecl *D, DeclarationName Name) { } } -ASTReader::ASTReader( - Preprocessor &PP, ASTContext &Context, - const PCHContainerReader &PCHContainerRdr, - ArrayRef> Extensions, - StringRef isysroot, bool DisableValidation, - bool AllowASTWithCompilerErrors, - bool AllowConfigurationMismatch, bool ValidateSystemInputs, - bool UseGlobalIndex, - std::unique_ptr ReadTimer) - : Listener(DisableValidation ? - cast(new SimpleASTReaderListener(PP)) : - cast(new PCHValidator(PP, *this))), - DeserializationListener(nullptr), - OwnsDeserializationListener(false), SourceMgr(PP.getSourceManager()), - FileMgr(PP.getFileManager()), PCHContainerRdr(PCHContainerRdr), - Diags(PP.getDiagnostics()), SemaObj(nullptr), PP(PP), Context(Context), - Consumer(nullptr), ModuleMgr(PP.getFileManager(), PCHContainerRdr), - DummyIdResolver(PP), - ReadTimer(std::move(ReadTimer)), - PragmaMSStructState(-1), - PragmaMSPointersToMembersState(-1), - isysroot(isysroot), DisableValidation(DisableValidation), +ASTReader::ASTReader(Preprocessor &PP, ASTContext &Context, + const PCHContainerReader &PCHContainerRdr, + ArrayRef> Extensions, + StringRef isysroot, bool DisableValidation, + bool AllowASTWithCompilerErrors, + bool AllowConfigurationMismatch, bool ValidateSystemInputs, + bool UseGlobalIndex, + std::unique_ptr ReadTimer) + : Listener(DisableValidation + ? cast(new SimpleASTReaderListener(PP)) + : cast(new PCHValidator(PP, *this))), + SourceMgr(PP.getSourceManager()), FileMgr(PP.getFileManager()), + PCHContainerRdr(PCHContainerRdr), Diags(PP.getDiagnostics()), PP(PP), + Context(Context), ModuleMgr(PP.getFileManager(), PCHContainerRdr), + DummyIdResolver(PP), ReadTimer(std::move(ReadTimer)), isysroot(isysroot), + DisableValidation(DisableValidation), AllowASTWithCompilerErrors(AllowASTWithCompilerErrors), AllowConfigurationMismatch(AllowConfigurationMismatch), ValidateSystemInputs(ValidateSystemInputs), - UseGlobalIndex(UseGlobalIndex), TriedLoadingGlobalIndex(false), - ProcessingUpdateRecords(false), - CurrSwitchCaseStmts(&SwitchCaseStmts), NumSLocEntriesRead(0), - TotalNumSLocEntries(0), NumStatementsRead(0), TotalNumStatements(0), - NumMacrosRead(0), TotalNumMacros(0), NumIdentifierLookups(0), - NumIdentifierLookupHits(0), NumSelectorsRead(0), - NumMethodPoolEntriesRead(0), NumMethodPoolLookups(0), - NumMethodPoolHits(0), NumMethodPoolTableLookups(0), - NumMethodPoolTableHits(0), TotalNumMethodPoolEntries(0), - NumLexicalDeclContextsRead(0), TotalLexicalDeclContexts(0), - NumVisibleDeclContextsRead(0), TotalVisibleDeclContexts(0), - TotalModulesSizeInBits(0), NumCurrentElementsDeserializing(0), - PassingDeclsToConsumer(false), ReadingKind(Read_None) { + UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) { SourceMgr.setExternalSLocEntrySource(this); for (const auto &Ext : Extensions) { @@ -8933,3 +9555,10 @@ ASTReader::~ASTReader() { IdentifierResolver &ASTReader::getIdResolver() { return SemaObj ? SemaObj->IdResolver : DummyIdResolver; } + +unsigned ASTRecordReader::readRecord(llvm::BitstreamCursor &Cursor, + unsigned AbbrevID) { + Idx = 0; + Record.clear(); + return Cursor.readRecord(AbbrevID, Record); +} diff --git a/tools/clang/lib/Serialization/ASTWriter.cpp b/tools/clang/lib/Serialization/ASTWriter.cpp index e83c3e4..279a974 100644 --- a/tools/clang/lib/Serialization/ASTWriter.cpp +++ b/tools/clang/lib/Serialization/ASTWriter.cpp @@ -73,6 +73,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" #include "llvm/Support/EndianStream.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/OnDiskHashTable.h" @@ -348,6 +349,15 @@ void ASTTypeWriter::VisitAutoType(const AutoType *T) { Code = TYPE_AUTO; } +void ASTTypeWriter::VisitDeducedTemplateSpecializationType( + const DeducedTemplateSpecializationType *T) { + Record.AddTemplateName(T->getTemplateName()); + Record.AddTypeRef(T->getDeducedType()); + if (T->getDeducedType().isNull()) + Record.push_back(T->isDependentType()); + Code = TYPE_DEDUCED_TEMPLATE_SPECIALIZATION; +} + void ASTTypeWriter::VisitTagType(const TagType *T) { Record.push_back(T->isDependentType()); Record.AddDeclRef(T->getDecl()->getCanonicalDecl()); @@ -629,6 +639,7 @@ void TypeLocWriter::VisitFunctionTypeLoc(FunctionTypeLoc TL) { Record.AddSourceLocation(TL.getLocalRangeBegin()); Record.AddSourceLocation(TL.getLParenLoc()); Record.AddSourceLocation(TL.getRParenLoc()); + Record.AddSourceRange(TL.getExceptionSpecRange()); Record.AddSourceLocation(TL.getLocalRangeEnd()); for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i) Record.AddDeclRef(TL.getParam(i)); @@ -681,6 +692,11 @@ void TypeLocWriter::VisitAutoTypeLoc(AutoTypeLoc TL) { Record.AddSourceLocation(TL.getNameLoc()); } +void TypeLocWriter::VisitDeducedTemplateSpecializationTypeLoc( + DeducedTemplateSpecializationTypeLoc TL) { + Record.AddSourceLocation(TL.getTemplateNameLoc()); +} + void TypeLocWriter::VisitRecordTypeLoc(RecordTypeLoc TL) { Record.AddSourceLocation(TL.getNameLoc()); } @@ -800,17 +816,17 @@ void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) { void ASTWriter::WriteTypeAbbrevs() { using namespace llvm; - BitCodeAbbrev *Abv; + std::shared_ptr Abv; // Abbreviation for TYPE_EXT_QUAL - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::TYPE_EXT_QUAL)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 3)); // Quals - TypeExtQualAbbrev = Stream.EmitAbbrev(Abv); + TypeExtQualAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for TYPE_FUNCTION_PROTO - Abv = new BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::TYPE_FUNCTION_PROTO)); // FunctionType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ReturnType @@ -828,7 +844,7 @@ void ASTWriter::WriteTypeAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumParams Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Params - TypeFunctionProtoAbbrev = Stream.EmitAbbrev(Abv); + TypeFunctionProtoAbbrev = Stream.EmitAbbrev(std::move(Abv)); } //===----------------------------------------------------------------------===// @@ -1028,6 +1044,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(IDENTIFIER_OFFSET); RECORD(IDENTIFIER_TABLE); RECORD(EAGERLY_DESERIALIZED_DECLS); + RECORD(MODULAR_CODEGEN_DECLS); RECORD(SPECIAL_TYPES); RECORD(STATISTICS); RECORD(TENTATIVE_DEFINITIONS); @@ -1053,6 +1070,8 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(HEADER_SEARCH_TABLE); RECORD(FP_PRAGMA_OPTIONS); RECORD(OPENCL_EXTENSIONS); + RECORD(OPENCL_EXTENSION_TYPES); + RECORD(OPENCL_EXTENSION_DECLS); RECORD(DELEGATING_CTORS); RECORD(KNOWN_NAMESPACES); RECORD(MODULE_OFFSET_MAP); @@ -1321,7 +1340,7 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, RecordData Record; // Metadata - auto *MetadataAbbrev = new BitCodeAbbrev(); + auto MetadataAbbrev = std::make_shared(); MetadataAbbrev->Add(BitCodeAbbrevOp(METADATA)); MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Major MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Minor @@ -1331,7 +1350,7 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Timestamps MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Errors MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // SVN branch/tag - unsigned MetadataAbbrevCode = Stream.EmitAbbrev(MetadataAbbrev); + unsigned MetadataAbbrevCode = Stream.EmitAbbrev(std::move(MetadataAbbrev)); assert((!WritingModule || isysroot.empty()) && "writing module as a relocatable PCH?"); { @@ -1354,10 +1373,10 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, } // Module name - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_NAME)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {MODULE_NAME}; Stream.EmitRecordWithBlob(AbbrevCode, Record, WritingModule->Name); } @@ -1374,10 +1393,10 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, .ModuleMapFileHomeIsCwd || WritingModule->Directory->getName() != StringRef(".")) { // Module directory. - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory - unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {MODULE_DIRECTORY}; Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir); @@ -1417,17 +1436,17 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, serialization::ModuleManager &Mgr = Chain->getModuleManager(); Record.clear(); - for (auto *M : Mgr) { + for (ModuleFile &M : Mgr) { // Skip modules that weren't directly imported. - if (!M->isDirectlyImported()) + if (!M.isDirectlyImported()) continue; - Record.push_back((unsigned)M->Kind); // FIXME: Stable encoding - AddSourceLocation(M->ImportLoc, Record); - Record.push_back(M->File->getSize()); - Record.push_back(getTimestampForOutput(M->File)); - Record.push_back(M->Signature); - AddPath(M->FileName, Record); + Record.push_back((unsigned)M.Kind); // FIXME: Stable encoding + AddSourceLocation(M.ImportLoc, Record); + Record.push_back(M.File->getSize()); + Record.push_back(getTimestampForOutput(M.File)); + Record.push_back(M.Signature); + AddPath(M.FileName, Record); } Stream.EmitRecord(IMPORTS, Record); } @@ -1584,11 +1603,11 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, // Original file name and file ID SourceManager &SM = Context.getSourceManager(); if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - auto *FileAbbrev = new BitCodeAbbrev(); + auto FileAbbrev = std::make_shared(); FileAbbrev->Add(BitCodeAbbrevOp(ORIGINAL_FILE)); FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File ID FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name - unsigned FileAbbrevCode = Stream.EmitAbbrev(FileAbbrev); + unsigned FileAbbrevCode = Stream.EmitAbbrev(std::move(FileAbbrev)); Record.clear(); Record.push_back(ORIGINAL_FILE); @@ -1602,10 +1621,10 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, // Original PCH directory if (!OutputFile.empty() && OutputFile != "-") { - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(ORIGINAL_PCH_DIR)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name - unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); SmallString<128> OutputPath(OutputFile); @@ -1642,7 +1661,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4); // Create input-file abbreviation. - auto *IFAbbrev = new BitCodeAbbrev(); + auto IFAbbrev = std::make_shared(); IFAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE)); IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Size @@ -1650,7 +1669,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name - unsigned IFAbbrevCode = Stream.EmitAbbrev(IFAbbrev); + unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev)); // Get all ContentCache objects for files, sorted by whether the file is a // system one or not. System files go at the back, users files at the front. @@ -1710,13 +1729,13 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, Stream.ExitBlock(); // Create input file offsets abbreviation. - auto *OffsetsAbbrev = new BitCodeAbbrev(); + auto OffsetsAbbrev = std::make_shared(); OffsetsAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_OFFSETS)); OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # input files OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # non-system // input files OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Array - unsigned OffsetsAbbrevCode = Stream.EmitAbbrev(OffsetsAbbrev); + unsigned OffsetsAbbrevCode = Stream.EmitAbbrev(std::move(OffsetsAbbrev)); // Write input file offsets. RecordData::value_type Record[] = {INPUT_FILE_OFFSETS, @@ -1733,7 +1752,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) { using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_FILE_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location @@ -1744,7 +1763,7 @@ static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) { Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumCreatedFIDs Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 24)); // FirstDeclIndex Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumDecls - return Stream.EmitAbbrev(Abbrev); + return Stream.EmitAbbrev(std::move(Abbrev)); } /// \brief Create an abbreviation for the SLocEntry that refers to a @@ -1752,14 +1771,14 @@ static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) { static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) { using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // Characteristic Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Buffer name blob - return Stream.EmitAbbrev(Abbrev); + return Stream.EmitAbbrev(std::move(Abbrev)); } /// \brief Create an abbreviation for the SLocEntry that refers to a @@ -1768,13 +1787,13 @@ static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream, bool Compressed) { using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED : SM_SLOC_BUFFER_BLOB)); if (Compressed) Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob - return Stream.EmitAbbrev(Abbrev); + return Stream.EmitAbbrev(std::move(Abbrev)); } /// \brief Create an abbreviation for the SLocEntry that refers to a macro @@ -1782,14 +1801,14 @@ static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream, static unsigned CreateSLocExpansionAbbrev(llvm::BitstreamWriter &Stream) { using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_EXPANSION_ENTRY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Spelling location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Start location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // End location Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Token length - return Stream.EmitAbbrev(Abbrev); + return Stream.EmitAbbrev(std::move(Abbrev)); } namespace { @@ -1964,13 +1983,13 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { // Create a blob abbreviation using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_TABLE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned TableAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned TableAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the header search table RecordData::value_type Record[] = {HEADER_SEARCH_TABLE, BucketOffset, @@ -1983,6 +2002,30 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { free(const_cast(SavedStrings[I])); } +static void emitBlob(llvm::BitstreamWriter &Stream, StringRef Blob, + unsigned SLocBufferBlobCompressedAbbrv, + unsigned SLocBufferBlobAbbrv) { + typedef ASTWriter::RecordData::value_type RecordDataType; + + // Compress the buffer if possible. We expect that almost all PCM + // consumers will not want its contents. + SmallString<0> CompressedBuffer; + if (llvm::zlib::isAvailable()) { + llvm::Error E = llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer); + if (!E) { + RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED, + Blob.size() - 1}; + Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record, + CompressedBuffer); + return; + } + llvm::consumeError(std::move(E)); + } + + RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB}; + Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob); +} + /// \brief Writes the block containing the serialized form of the /// source manager. /// @@ -2091,20 +2134,8 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, const llvm::MemoryBuffer *Buffer = Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager()); StringRef Blob(Buffer->getBufferStart(), Buffer->getBufferSize() + 1); - - // Compress the buffer if possible. We expect that almost all PCM - // consumers will not want its contents. - SmallString<0> CompressedBuffer; - if (llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer) == - llvm::zlib::StatusOK) { - RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED, - Blob.size() - 1}; - Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record, - CompressedBuffer); - } else { - RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB}; - Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob); - } + emitBlob(Stream, Blob, SLocBufferBlobCompressedAbbrv, + SLocBufferBlobAbbrv); } } else { // The source location entry is a macro expansion. @@ -2134,12 +2165,12 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr, // table is used for lazily loading source-location information. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets - unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = { SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(), @@ -2389,13 +2420,13 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { // Write the offsets table for macro IDs. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), FirstMacroID - NUM_PREDEF_MACRO_IDS}; @@ -2419,14 +2450,14 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) { // Set up the abbreviation for unsigned InclusionAbbrev = 0; { - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(PPD_INCLUSION_DIRECTIVE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // filename length Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // in quotes Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // kind Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // imported module Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - InclusionAbbrev = Stream.EmitAbbrev(Abbrev); + InclusionAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); } unsigned FirstPreprocessorEntityID @@ -2489,11 +2520,11 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) { // Write the offsets table for identifier IDs. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(PPD_ENTITIES_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first pp entity Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned PPEOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned PPEOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {PPD_ENTITIES_OFFSETS, FirstPreprocessorEntityID - @@ -2513,7 +2544,8 @@ unsigned ASTWriter::getLocalOrImportedSubmoduleID(Module *Mod) { auto *Top = Mod->getTopLevelModule(); if (Top != WritingModule && - !Top->fullModuleNameIs(StringRef(getLangOpts().CurrentModule))) + (getLangOpts().CompilingPCH || + !Top->fullModuleNameIs(StringRef(getLangOpts().CurrentModule)))) return 0; return SubmoduleIDs[Mod] = NextSubmoduleID++; @@ -2547,7 +2579,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { // Write the abbreviations needed for the submodules block. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_DEFINITION)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent @@ -2559,71 +2591,72 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferExplicit... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferExportWild... Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ConfigMacrosExh... + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // WithCodegen Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned DefinitionAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned DefinitionAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned UmbrellaAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned UmbrellaAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned HeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned HeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TOPHEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned TopHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned TopHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_DIR)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned UmbrellaDirAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned UmbrellaDirAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_REQUIRES)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // State Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Feature - unsigned RequiresAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned RequiresAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_EXCLUDED_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned ExcludedHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned ExcludedHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TEXTUAL_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned TextualHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned TextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned PrivateHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned PrivateHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_TEXTUAL_HEADER)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned PrivateTextualHeaderAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned PrivateTextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_LINK_LIBRARY)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name - unsigned LinkLibraryAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned LinkLibraryAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFIG_MACRO)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Macro name - unsigned ConfigMacroAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned ConfigMacroAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFLICT)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Other module Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Message - unsigned ConflictAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned ConflictAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the submodule metadata block. RecordData::value_type Record[] = {getNumberOfModules(WritingModule), @@ -2647,11 +2680,18 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { // Emit the definition of the block. { - RecordData::value_type Record[] = { - SUBMODULE_DEFINITION, ID, ParentID, Mod->IsFramework, Mod->IsExplicit, - Mod->IsSystem, Mod->IsExternC, Mod->InferSubmodules, - Mod->InferExplicitSubmodules, Mod->InferExportWildcard, - Mod->ConfigMacrosExhaustive}; + RecordData::value_type Record[] = {SUBMODULE_DEFINITION, + ID, + ParentID, + Mod->IsFramework, + Mod->IsExplicit, + Mod->IsSystem, + Mod->IsExternC, + Mod->InferSubmodules, + Mod->InferExplicitSubmodules, + Mod->InferExportWildcard, + Mod->ConfigMacrosExhaustive, + Context->getLangOpts().ModularCodegen && WritingModule}; Stream.EmitRecordWithBlob(DefinitionAbbrev, Record, Mod->Name); } @@ -2786,38 +2826,43 @@ ASTWriter::inferSubmoduleIDFromLocation(SourceLocation Loc) { void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, bool isModule) { - // Make sure set diagnostic pragmas don't affect the translation unit that - // imports the module. - // FIXME: Make diagnostic pragma sections work properly with modules. - if (isModule) - return; - llvm::SmallDenseMap DiagStateIDMap; unsigned CurrID = 0; - DiagStateIDMap[&Diag.DiagStates.front()] = ++CurrID; // the command-line one. RecordData Record; - for (DiagnosticsEngine::DiagStatePointsTy::const_iterator - I = Diag.DiagStatePoints.begin(), E = Diag.DiagStatePoints.end(); - I != E; ++I) { - const DiagnosticsEngine::DiagStatePoint &point = *I; - if (point.Loc.isInvalid()) - continue; - AddSourceLocation(point.Loc, Record); - unsigned &DiagStateID = DiagStateIDMap[point.State]; + auto AddDiagState = [&](const DiagnosticsEngine::DiagState *State, + bool IncludeNonPragmaStates) { + unsigned &DiagStateID = DiagStateIDMap[State]; Record.push_back(DiagStateID); - + if (DiagStateID == 0) { DiagStateID = ++CurrID; - for (const auto &I : *(point.State)) { - if (I.second.isPragma()) { + for (const auto &I : *State) { + if (I.second.isPragma() || IncludeNonPragmaStates) { Record.push_back(I.first); Record.push_back((unsigned)I.second.getSeverity()); } } - Record.push_back(-1); // mark the end of the diag/map pairs for this - // location. + // Add a sentinel to mark the end of the diag IDs. + Record.push_back(unsigned(-1)); + } + }; + + AddDiagState(Diag.DiagStatesByLoc.FirstDiagState, isModule); + AddSourceLocation(Diag.DiagStatesByLoc.CurDiagStateLoc, Record); + AddDiagState(Diag.DiagStatesByLoc.CurDiagState, false); + + for (auto &FileIDAndFile : Diag.DiagStatesByLoc.Files) { + if (!FileIDAndFile.first.isValid() || + !FileIDAndFile.second.HasLocalTransitions) + continue; + AddSourceLocation(Diag.SourceMgr->getLocForStartOfFile(FileIDAndFile.first), + Record); + Record.push_back(FileIDAndFile.second.StateTransitions.size()); + for (auto &StatePoint : FileIDAndFile.second.StateTransitions) { + Record.push_back(StatePoint.Offset); + AddDiagState(StatePoint.State, false); } } @@ -2889,12 +2934,12 @@ void ASTWriter::WriteTypeDeclOffsets() { using namespace llvm; // Write the type offsets array - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(TYPE_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of types Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base type index Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // types block - unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size(), FirstTypeID - NUM_PREDEF_TYPE_IDS}; @@ -2902,12 +2947,12 @@ void ASTWriter::WriteTypeDeclOffsets() { } // Write the declaration offsets array - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(DECL_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of declarations Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base decl ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // declarations block - unsigned DeclOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned DeclOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {DECL_OFFSET, DeclOffsets.size(), FirstDeclID - NUM_PREDEF_DECL_IDS}; @@ -2932,11 +2977,11 @@ void ASTWriter::WriteFileDeclIDsMap() { FileGroupedDeclIDs.push_back(LocDeclEntry.second); } - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(FILE_SORTED_DECLS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {FILE_SORTED_DECLS, FileGroupedDeclIDs.size()}; Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs)); @@ -3140,12 +3185,12 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) { } // Create a blob abbreviation - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(METHOD_POOL)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned MethodPoolAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned MethodPoolAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the method pool { @@ -3155,12 +3200,12 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) { } // Create a blob abbreviation for the selector table offsets. - Abbrev = new BitCodeAbbrev(); + Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(SELECTOR_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned SelectorOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned SelectorOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the selector offsets table. { @@ -3450,11 +3495,11 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, } // Create a blob abbreviation - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_TABLE)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); // Write the identifier table RecordData::value_type Record[] = {IDENTIFIER_TABLE, BucketOffset}; @@ -3462,12 +3507,12 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, } // Write the offsets table for identifier IDs. - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); #ifndef NDEBUG for (unsigned I = 0, N = IdentifierOffsets.size(); I != N; ++I) @@ -3556,6 +3601,7 @@ class ASTDeclContextNameLookupTrait { case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: case DeclarationName::CXXLiteralOperatorName: + case DeclarationName::CXXDeductionGuideName: KeyLen += 4; break; case DeclarationName::CXXOperatorName: @@ -3585,6 +3631,7 @@ class ASTDeclContextNameLookupTrait { switch (Name.getKind()) { case DeclarationName::Identifier: case DeclarationName::CXXLiteralOperatorName: + case DeclarationName::CXXDeductionGuideName: LE.write(Writer.getIdentifierRef(Name.getIdentifier())); return; case DeclarationName::ObjCZeroArgSelector: @@ -3939,11 +3986,46 @@ void ASTWriter::WriteOpenCLExtensions(Sema &SemaRef) { const OpenCLOptions &Opts = SemaRef.getOpenCLOptions(); RecordData Record; -#define OPENCLEXT(nm) Record.push_back(Opts.nm); -#include "clang/Basic/OpenCLExtensions.def" + for (const auto &I:Opts.OptMap) { + AddString(I.getKey(), Record); + auto V = I.getValue(); + Record.push_back(V.Supported ? 1 : 0); + Record.push_back(V.Enabled ? 1 : 0); + Record.push_back(V.Avail); + Record.push_back(V.Core); + } Stream.EmitRecord(OPENCL_EXTENSIONS, Record); } +void ASTWriter::WriteOpenCLExtensionTypes(Sema &SemaRef) { + if (!SemaRef.Context.getLangOpts().OpenCL) + return; + + RecordData Record; + for (const auto &I : SemaRef.OpenCLTypeExtMap) { + Record.push_back( + static_cast(getTypeID(I.first->getCanonicalTypeInternal()))); + Record.push_back(I.second.size()); + for (auto Ext : I.second) + AddString(Ext, Record); + } + Stream.EmitRecord(OPENCL_EXTENSION_TYPES, Record); +} + +void ASTWriter::WriteOpenCLExtensionDecls(Sema &SemaRef) { + if (!SemaRef.Context.getLangOpts().OpenCL) + return; + + RecordData Record; + for (const auto &I : SemaRef.OpenCLDeclExtMap) { + Record.push_back(getDeclID(I.first)); + Record.push_back(static_cast(I.second.size())); + for (auto Ext : I.second) + AddString(Ext, Record); + } + Stream.EmitRecord(OPENCL_EXTENSION_DECLS, Record); +} + void ASTWriter::WriteCUDAPragmas(Sema &SemaRef) { if (SemaRef.ForceCUDAHostDeviceDepth > 0) { RecordData::value_type Record[] = {SemaRef.ForceCUDAHostDeviceDepth}; @@ -3988,11 +4070,11 @@ void ASTWriter::WriteObjCCategories() { // Emit the categories map. using namespace llvm; - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(OBJC_CATEGORIES_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of entries Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned AbbrevID = Stream.EmitAbbrev(Abbrev); + unsigned AbbrevID = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {OBJC_CATEGORIES_MAP, CategoriesMap.size()}; Stream.EmitRecordWithBlob(AbbrevID, Record, @@ -4054,14 +4136,14 @@ void ASTWriter::WriteModuleFileExtension(Sema &SemaRef, Stream.EnterSubblock(EXTENSION_BLOCK_ID, 4); // Emit the metadata record abbreviation. - auto *Abv = new llvm::BitCodeAbbrev(); + auto Abv = std::make_shared(); Abv->Add(llvm::BitCodeAbbrevOp(EXTENSION_METADATA)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - unsigned Abbrev = Stream.EmitAbbrev(Abv); + unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv)); // Emit the metadata record. RecordData Record; @@ -4184,29 +4266,10 @@ void ASTWriter::SetSelectorOffset(Selector Sel, uint32_t Offset) { SelectorOffsets[ID - FirstSelectorID] = Offset; } -ASTWriter::ASTWriter( - llvm::BitstreamWriter &Stream, - ArrayRef> Extensions, - bool IncludeTimestamps) - : Stream(Stream), Context(nullptr), PP(nullptr), Chain(nullptr), - WritingModule(nullptr), IncludeTimestamps(IncludeTimestamps), - WritingAST(false), DoneWritingDeclsAndTypes(false), - ASTHasCompilerErrors(false), FirstDeclID(NUM_PREDEF_DECL_IDS), - NextDeclID(FirstDeclID), FirstTypeID(NUM_PREDEF_TYPE_IDS), - NextTypeID(FirstTypeID), FirstIdentID(NUM_PREDEF_IDENT_IDS), - NextIdentID(FirstIdentID), FirstMacroID(NUM_PREDEF_MACRO_IDS), - NextMacroID(FirstMacroID), FirstSubmoduleID(NUM_PREDEF_SUBMODULE_IDS), - NextSubmoduleID(FirstSubmoduleID), - FirstSelectorID(NUM_PREDEF_SELECTOR_IDS), NextSelectorID(FirstSelectorID), - NumStatements(0), NumMacros(0), - NumLexicalDeclContexts(0), NumVisibleDeclContexts(0), - TypeExtQualAbbrev(0), TypeFunctionProtoAbbrev(0), DeclParmVarAbbrev(0), - DeclContextLexicalAbbrev(0), DeclContextVisibleLookupAbbrev(0), - UpdateVisibleAbbrev(0), DeclRecordAbbrev(0), DeclTypedefAbbrev(0), - DeclVarAbbrev(0), DeclFieldAbbrev(0), DeclEnumAbbrev(0), - DeclObjCIvarAbbrev(0), DeclCXXMethodAbbrev(0), DeclRefExprAbbrev(0), - CharacterLiteralAbbrev(0), IntegerLiteralAbbrev(0), - ExprImplicitCastAbbrev(0) { +ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream, + ArrayRef> Extensions, + bool IncludeTimestamps) + : Stream(Stream), IncludeTimestamps(IncludeTimestamps) { for (const auto &Ext : Extensions) { if (auto Writer = Ext->createExtensionWriter(*this)) ModuleFileExtensionWriters.push_back(std::move(Writer)); @@ -4439,10 +4502,10 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, } } - auto *Abv = new llvm::BitCodeAbbrev(); + auto Abv = std::make_shared(); Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(Abv); + unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); { RecordData::value_type Record[] = {TU_UPDATE_LEXICAL}; Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record, @@ -4450,11 +4513,11 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, } // And a visible updates block for the translation unit. - Abv = new llvm::BitCodeAbbrev(); + Abv = std::make_shared(); Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - UpdateVisibleAbbrev = Stream.EmitAbbrev(Abv); + UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv)); WriteDeclContextVisibleUpdate(TU); // If we have any extern "C" names, write out a visible update for them. @@ -4549,17 +4612,17 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // c++-base-specifiers-id:i32 // type-id:i32) // - auto *Abbrev = new BitCodeAbbrev(); + auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - unsigned ModuleOffsetMapAbbrev = Stream.EmitAbbrev(Abbrev); + unsigned ModuleOffsetMapAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); SmallString<2048> Buffer; { llvm::raw_svector_ostream Out(Buffer); - for (ModuleFile *M : Chain->ModuleMgr) { + for (ModuleFile &M : Chain->ModuleMgr) { using namespace llvm::support; endian::Writer LE(Out); - StringRef FileName = M->FileName; + StringRef FileName = M.FileName; LE.write(FileName.size()); Out.write(FileName.data(), FileName.size()); @@ -4577,15 +4640,15 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // These values should be unique within a chain, since they will be read // as keys into ContinuousRangeMaps. - writeBaseIDOrNone(M->SLocEntryBaseOffset, M->LocalNumSLocEntries); - writeBaseIDOrNone(M->BaseIdentifierID, M->LocalNumIdentifiers); - writeBaseIDOrNone(M->BaseMacroID, M->LocalNumMacros); - writeBaseIDOrNone(M->BasePreprocessedEntityID, - M->NumPreprocessedEntities); - writeBaseIDOrNone(M->BaseSubmoduleID, M->LocalNumSubmodules); - writeBaseIDOrNone(M->BaseSelectorID, M->LocalNumSelectors); - writeBaseIDOrNone(M->BaseDeclID, M->LocalNumDecls); - writeBaseIDOrNone(M->BaseTypeIndex, M->LocalNumTypes); + writeBaseIDOrNone(M.SLocEntryBaseOffset, M.LocalNumSLocEntries); + writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers); + writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros); + writeBaseIDOrNone(M.BasePreprocessedEntityID, + M.NumPreprocessedEntities); + writeBaseIDOrNone(M.BaseSubmoduleID, M.LocalNumSubmodules); + writeBaseIDOrNone(M.BaseSelectorID, M.LocalNumSelectors); + writeBaseIDOrNone(M.BaseDeclID, M.LocalNumDecls); + writeBaseIDOrNone(M.BaseTypeIndex, M.LocalNumTypes); } } RecordData::value_type Record[] = {MODULE_OFFSET_MAP}; @@ -4630,23 +4693,14 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, WriteIdentifierTable(PP, SemaRef.IdResolver, isModule); WriteFPPragmaOptions(SemaRef.getFPOptions()); WriteOpenCLExtensions(SemaRef); + WriteOpenCLExtensionTypes(SemaRef); + WriteOpenCLExtensionDecls(SemaRef); WriteCUDAPragmas(SemaRef); WritePragmaDiagnosticMappings(Context.getDiagnostics(), isModule); // If we're emitting a module, write out the submodule information. if (WritingModule) WriteSubmodules(WritingModule); - else if (!getLangOpts().CurrentModule.empty()) { - // If we're building a PCH in the implementation of a module, we may need - // the description of the current module. - // - // FIXME: We may need other modules that we did not load from an AST file, - // such as if a module declares a 'conflicts' on a different module. - Module *M = PP.getHeaderSearchInfo().getModuleMap().findModule( - getLangOpts().CurrentModule); - if (M && !M->IsFromModuleFile) - WriteSubmodules(M); - } Stream.EmitRecord(SPECIAL_TYPES, SpecialTypes); @@ -4654,6 +4708,9 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, if (!EagerlyDeserializedDecls.empty()) Stream.EmitRecord(EAGERLY_DESERIALIZED_DECLS, EagerlyDeserializedDecls); + if (Context.getLangOpts().ModularCodegen) + Stream.EmitRecord(MODULAR_CODEGEN_DECLS, ModularCodegenDecls); + // Write the record containing tentative definitions. if (!TentativeDefinitions.empty()) Stream.EmitRecord(TENTATIVE_DEFINITIONS, TentativeDefinitions); @@ -5221,6 +5278,10 @@ void ASTRecordWriter::AddDeclarationName(DeclarationName Name) { AddTypeRef(Name.getCXXNameType()); break; + case DeclarationName::CXXDeductionGuideName: + AddDeclRef(Name.getCXXDeductionGuideTemplate()); + break; + case DeclarationName::CXXOperatorName: Record->push_back(Name.getCXXOverloadedOperator()); break; @@ -5282,6 +5343,7 @@ void ASTRecordWriter::AddDeclarationNameLoc(const DeclarationNameLoc &DNLoc, case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: case DeclarationName::CXXUsingDirective: + case DeclarationName::CXXDeductionGuideName: break; } } @@ -5643,10 +5705,12 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { Record->push_back(Data.ComputedVisibleConversions); Record->push_back(Data.UserProvidedDefaultConstructor); Record->push_back(Data.DeclaredSpecialMembers); - Record->push_back(Data.ImplicitCopyConstructorHasConstParam); + Record->push_back(Data.ImplicitCopyConstructorCanHaveConstParamForVBase); + Record->push_back(Data.ImplicitCopyConstructorCanHaveConstParamForNonVBase); Record->push_back(Data.ImplicitCopyAssignmentHasConstParam); Record->push_back(Data.HasDeclaredCopyConstructorWithConstParam); Record->push_back(Data.HasDeclaredCopyAssignmentWithConstParam); + Record->push_back(Data.ODRHash); // IsLambda bit is already saved. Record->push_back(Data.NumBases); diff --git a/tools/clang/test/Driver/sanitizer-ld.c b/tools/clang/test/Driver/sanitizer-ld.c index 76af97f..5ba3124 100644 --- a/tools/clang/test/Driver/sanitizer-ld.c +++ b/tools/clang/test/Driver/sanitizer-ld.c @@ -1,7 +1,7 @@ // Test sanitizers ld flags. // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-linux -fsanitize=address \ +// RUN: -target i386-unknown-linux -fuse-ld=ld -fsanitize=address \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-LINUX %s @@ -17,7 +17,7 @@ // CHECK-ASAN-LINUX: "-ldl" // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-linux -fsanitize=address -shared-libasan \ +// RUN: -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libasan \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-SHARED-ASAN-LINUX %s @@ -34,7 +34,7 @@ // CHECK-SHARED-ASAN-LINUX-NOT: "--dynamic-list" // RUN: %clang -no-canonical-prefixes %s -### -o %t.so -shared 2>&1 \ -// RUN: -target i386-unknown-linux -fsanitize=address -shared-libasan \ +// RUN: -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libasan \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-DSO-SHARED-ASAN-LINUX %s @@ -51,7 +51,7 @@ // CHECK-DSO-SHARED-ASAN-LINUX-NOT: "--dynamic-list" // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-freebsd -fsanitize=address \ +// RUN: -target i386-unknown-freebsd -fuse-ld=ld -fsanitize=address \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: --sysroot=%S/Inputs/basic_freebsd_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-FREEBSD %s @@ -67,7 +67,7 @@ // CHECK-ASAN-FREEBSD: "-lrt" // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-freebsd -fsanitize=address \ +// RUN: -target i386-unknown-freebsd -fuse-ld=ld -fsanitize=address \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: --sysroot=%S/Inputs/basic_freebsd_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-FREEBSD-LDL %s @@ -76,7 +76,7 @@ // CHECK-ASAN-FREEBSD-LDL-NOT: "-ldl" // RUN: %clangxx -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-linux -stdlib=platform -fsanitize=address \ +// RUN: -target i386-unknown-linux -fuse-ld=ld -stdlib=platform -fsanitize=address \ // RUN: -resource-dir=%S/Inputs/empty_resource_dir \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-LINUX-CXX %s @@ -93,7 +93,7 @@ // CHECK-ASAN-LINUX-CXX: "-ldl" // RUN: %clang -no-canonical-prefixes %s -### -o /dev/null -fsanitize=address \ -// RUN: -target i386-unknown-linux -stdlib=platform \ +// RUN: -target i386-unknown-linux -fuse-ld=ld -stdlib=platform \ // RUN: --sysroot=%S/Inputs/basic_linux_tree -lstdc++ -static 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-LINUX-CXX-STATIC %s // @@ -103,7 +103,7 @@ // CHECK-ASAN-LINUX-CXX-STATIC: stdc++ // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target arm-linux-gnueabi -fsanitize=address \ +// RUN: -target arm-linux-gnueabi -fuse-ld=ld -fsanitize=address \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-ARM %s // @@ -112,7 +112,7 @@ // CHECK-ASAN-ARM: libclang_rt.asan-arm.a" // // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target armv7l-linux-gnueabi -fsanitize=address \ +// RUN: -target armv7l-linux-gnueabi -fuse-ld=ld -fsanitize=address \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-ARMv7 %s // @@ -121,7 +121,7 @@ // CHECK-ASAN-ARMv7: libclang_rt.asan-arm.a" // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target arm-linux-androideabi -fsanitize=address \ +// RUN: -target arm-linux-androideabi -fuse-ld=ld -fsanitize=address \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-ANDROID %s // @@ -141,7 +141,7 @@ // CHECK-ASAN-ANDROID-SHARED-LIBASAN-NOT: argument unused during compilation: '-shared-libasan' // // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target arm-linux-androideabi -fsanitize=address \ +// RUN: -target arm-linux-androideabi -fuse-ld=ld -fsanitize=address \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: -shared \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-ANDROID-SHARED %s @@ -152,7 +152,7 @@ // CHECK-ASAN-ANDROID-SHARED-NOT: "-lpthread" // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target sparcel-myriad-rtems-elf -fsanitize=address \ +// RUN: -target sparcel-myriad-rtems-elf -fuse-ld=ld -fsanitize=address \ // RUN: --sysroot=%S/Inputs/basic_myriad_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-MYRIAD %s // @@ -161,7 +161,7 @@ // CHECK-ASAN-MYRIAD: libclang_rt.asan-sparcel.a" // RUN: %clangxx -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux -stdlib=platform -lstdc++ \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \ // RUN: -fsanitize=thread \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ @@ -180,7 +180,7 @@ // CHECK-TSAN-LINUX-CXX: "-ldl" // RUN: %clangxx -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux -stdlib=platform -lstdc++ \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \ // RUN: -fsanitize=memory \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ @@ -199,7 +199,7 @@ // CHECK-MSAN-LINUX-CXX: "-ldl" // RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-linux \ +// RUN: -target i386-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-UBSAN-LINUX %s // CHECK-UBSAN-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -220,7 +220,7 @@ // CHECK-UBSAN-LINUX-LINK-CXX-NOT: "-lstdc++" // RUN: %clangxx -fsanitize=undefined %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-linux -stdlib=platform \ +// RUN: -target i386-unknown-linux -fuse-ld=ld -stdlib=platform \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-UBSAN-LINUX-CXX %s @@ -235,7 +235,7 @@ // CHECK-UBSAN-LINUX-CXX: "-lpthread" // RUN: %clang -fsanitize=address,undefined %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-linux \ +// RUN: -target i386-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-UBSAN-LINUX %s // CHECK-ASAN-UBSAN-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -245,7 +245,7 @@ // CHECK-ASAN-UBSAN-LINUX: "-lpthread" // RUN: %clangxx -fsanitize=address,undefined %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-linux -stdlib=platform \ +// RUN: -target i386-unknown-linux -fuse-ld=ld -stdlib=platform \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-UBSAN-LINUX-CXX %s // CHECK-ASAN-UBSAN-LINUX-CXX: "{{.*}}ld{{(.exe)?}}" @@ -256,7 +256,7 @@ // CHECK-ASAN-UBSAN-LINUX-CXX: "-lpthread" // RUN: %clangxx -fsanitize=memory,undefined %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-MSAN-UBSAN-LINUX-CXX %s // CHECK-MSAN-UBSAN-LINUX-CXX: "{{.*}}ld{{(.exe)?}}" @@ -264,7 +264,7 @@ // CHECK-MSAN-UBSAN-LINUX-CXX-NOT: libclang_rt.ubsan // RUN: %clangxx -fsanitize=thread,undefined %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-TSAN-UBSAN-LINUX-CXX %s // CHECK-TSAN-UBSAN-LINUX-CXX: "{{.*}}ld{{(.exe)?}}" @@ -272,7 +272,7 @@ // CHECK-TSAN-UBSAN-LINUX-CXX-NOT: libclang_rt.ubsan // RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \ -// RUN: -target i386-unknown-linux \ +// RUN: -target i386-unknown-linux -fuse-ld=ld \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: -shared \ @@ -283,7 +283,7 @@ // CHECK-UBSAN-LINUX-SHARED-NOT: libclang_rt.ubsan // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux -fsanitize=leak \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld -fsanitize=leak \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-LSAN-LINUX %s // @@ -295,7 +295,7 @@ // CHECK-LSAN-LINUX: "-ldl" // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux -fsanitize=leak -fsanitize-coverage=func \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld -fsanitize=leak -fsanitize-coverage=func \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-LSAN-COV-LINUX %s // @@ -308,7 +308,7 @@ // CHECK-LSAN-COV-LINUX: "-ldl" // RUN: %clang -fsanitize=leak,address %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-LSAN-ASAN-LINUX %s // CHECK-LSAN-ASAN-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -317,7 +317,7 @@ // CHECK-LSAN-ASAN-LINUX-NOT: libclang_rt.lsan // RUN: %clang -fsanitize=address -fsanitize-coverage=func %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-COV-LINUX %s // CHECK-ASAN-COV-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -327,7 +327,7 @@ // CHECK-ASAN-COV-LINUX: "-lpthread" // RUN: %clang -fsanitize=memory -fsanitize-coverage=func %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-MSAN-COV-LINUX %s // CHECK-MSAN-COV-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -337,7 +337,7 @@ // CHECK-MSAN-COV-LINUX: "-lpthread" // RUN: %clang -fsanitize=dataflow -fsanitize-coverage=func %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-DFSAN-COV-LINUX %s // CHECK-DFSAN-COV-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -347,7 +347,7 @@ // CHECK-DFSAN-COV-LINUX: "-lpthread" // RUN: %clang -fsanitize=undefined -fsanitize-coverage=func %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-UBSAN-COV-LINUX %s // CHECK-UBSAN-COV-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -356,7 +356,7 @@ // CHECK-UBSAN-COV-LINUX: "-lpthread" // RUN: %clang -fsanitize-coverage=func %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-COV-LINUX %s // CHECK-COV-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -366,7 +366,7 @@ // CFI by itself does not link runtime libraries. // RUN: %clang -fsanitize=cfi %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux -rtlib=platform \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld -rtlib=platform \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-LINUX %s // CHECK-CFI-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -375,7 +375,7 @@ // CFI with diagnostics links the UBSan runtime. // RUN: %clang -fsanitize=cfi -fno-sanitize-trap=cfi -fsanitize-recover=cfi \ // RUN: %s -### -o %t.o 2>&1\ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-DIAG-LINUX %s // CHECK-CFI-DIAG-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -383,7 +383,7 @@ // Cross-DSO CFI links the CFI runtime. // RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-LINUX %s // CHECK-CFI-CROSS-DSO-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -393,7 +393,7 @@ // Cross-DSO CFI with diagnostics links just the CFI runtime. // RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \ // RUN: -fno-sanitize-trap=cfi -fsanitize-recover=cfi \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-DIAG-LINUX %s // CHECK-CFI-CROSS-DSO-DIAG-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -402,7 +402,7 @@ // RUN: %clangxx -fsanitize=address %s -### -o %t.o 2>&1 \ // RUN: -mmacosx-version-min=10.6 \ -// RUN: -target x86_64-apple-darwin13.4.0 -stdlib=platform \ +// RUN: -target x86_64-apple-darwin13.4.0 -fuse-ld=ld -stdlib=platform \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-DARWIN106-CXX %s // CHECK-ASAN-DARWIN106-CXX: "{{.*}}ld{{(.exe)?}}" @@ -410,18 +410,20 @@ // CHECK-ASAN-DARWIN106-CXX-NOT: -lc++abi // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux -fsanitize=safe-stack \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld -fsanitize=safe-stack \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-SAFESTACK-LINUX %s // // CHECK-SAFESTACK-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-SAFESTACK-LINUX-NOT: "-lc" +// CHECK-SAFESTACK-LINUX-NOT: whole-archive // CHECK-SAFESTACK-LINUX: libclang_rt.safestack-x86_64.a" +// CHECK-SAFESTACK-LINUX: "-u" "__safestack_init" // CHECK-SAFESTACK-LINUX: "-lpthread" // CHECK-SAFESTACK-LINUX: "-ldl" // RUN: %clang -fsanitize=cfi -fsanitize-stats %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-STATS-LINUX %s // CHECK-CFI-STATS-LINUX: "{{.*}}ld{{(.exe)?}}" @@ -430,7 +432,7 @@ // CHECK-CFI-STATS-LINUX: "{{[^"]*}}libclang_rt.stats-x86_64.a" // RUN: %clang -fsanitize=cfi -fsanitize-stats %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-apple-darwin \ +// RUN: -target x86_64-apple-darwin -fuse-ld=ld \ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-STATS-DARWIN %s // CHECK-CFI-STATS-DARWIN: "{{.*}}ld{{(.exe)?}}" @@ -454,7 +456,7 @@ // CHECK-CFI-STATS-WIN32: "--linker-option=/include:___sanitizer_stats_register" // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target arm-linux-androideabi -fsanitize=safe-stack \ +// RUN: -target arm-linux-androideabi -fuse-ld=ld -fsanitize=safe-stack \ // RUN: --sysroot=%S/Inputs/basic_android_tree \ // RUN: | FileCheck --check-prefix=CHECK-SAFESTACK-ANDROID-ARM %s // @@ -462,7 +464,7 @@ // CHECK-SAFESTACK-ANDROID-ARM-NOT: libclang_rt.safestack // RUN: %clang -no-canonical-prefixes %s -### -o %t.o -shared 2>&1 \ -// RUN: -target arm-linux-androideabi -fsanitize=safe-stack \ +// RUN: -target arm-linux-androideabi -fuse-ld=ld -fsanitize=safe-stack \ // RUN: --sysroot=%S/Inputs/basic_android_tree \ // RUN: | FileCheck --check-prefix=CHECK-SAFESTACK-SHARED-ANDROID-ARM %s // @@ -470,7 +472,7 @@ // CHECK-SAFESTACK-SHARED-ANDROID-ARM-NOT: libclang_rt.safestack // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target aarch64-linux-android -fsanitize=safe-stack \ +// RUN: -target aarch64-linux-android -fuse-ld=ld -fsanitize=safe-stack \ // RUN: --sysroot=%S/Inputs/basic_android_tree \ // RUN: | FileCheck --check-prefix=CHECK-SAFESTACK-ANDROID-AARCH64 %s // @@ -478,7 +480,7 @@ // CHECK-SAFESTACK-ANDROID-AARCH64-NOT: libclang_rt.safestack // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target arm-linux-androideabi -fsanitize=cfi \ +// RUN: -target arm-linux-androideabi -fuse-ld=ld -fsanitize=cfi \ // RUN: --sysroot=%S/Inputs/basic_android_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-ANDROID %s // @@ -487,7 +489,7 @@ // CHECK-CFI-ANDROID-NOT: __cfi_check // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target arm-linux-androideabi -fsanitize=cfi \ +// RUN: -target arm-linux-androideabi -fuse-ld=ld -fsanitize=cfi \ // RUN: -fsanitize-cfi-cross-dso \ // RUN: --sysroot=%S/Inputs/basic_android_tree \ // RUN: | FileCheck --check-prefix=CHECK-CROSSDSO-CFI-ANDROID %s @@ -498,31 +500,31 @@ // CHECK-CROSSDSO-CFI-ANDROID-NOT: libclang_rt.cfi // RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-scei-ps4 \ +// RUN: -target x86_64-scei-ps4 -fuse-ld=ld \ // RUN: -shared \ // RUN: | FileCheck --check-prefix=CHECK-UBSAN-PS4 %s // CHECK-UBSAN-PS4: "{{.*}}ld{{(.gold)?(.exe)?}}" // CHECK-UBSAN-PS4: -lSceDbgUBSanitizer_stub_weak // RUN: %clang -fsanitize=address %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-scei-ps4 \ +// RUN: -target x86_64-scei-ps4 -fuse-ld=ld \ // RUN: -shared \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-PS4 %s // CHECK-ASAN-PS4: "{{.*}}ld{{(.gold)?(.exe)?}}" // CHECK-ASAN-PS4: -lSceDbgAddressSanitizer_stub_weak // RUN: %clang -fsanitize=address,undefined %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-scei-ps4 \ +// RUN: -target x86_64-scei-ps4 -fuse-ld=ld \ // RUN: -shared \ // RUN: | FileCheck --check-prefix=CHECK-AUBSAN-PS4 %s // CHECK-AUBSAN-PS4: "{{.*}}ld{{(.gold)?(.exe)?}}" // CHECK-AUBSAN-PS4: -lSceDbgAddressSanitizer_stub_weak // RUN: %clang -fsanitize=efficiency-cache-frag %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: | FileCheck --check-prefix=CHECK-ESAN-LINUX %s // RUN: %clang -fsanitize=efficiency-working-set %s -### -o %t.o 2>&1 \ -// RUN: -target x86_64-unknown-linux \ +// RUN: -target x86_64-unknown-linux -fuse-ld=ld \ // RUN: | FileCheck --check-prefix=CHECK-ESAN-LINUX %s // // CHECK-ESAN-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"