From 90696d17f2d6fda87d1cb4f75cc35015ba2795c9 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Sat, 18 Jan 2025 06:18:46 +0100 Subject: [PATCH 01/11] [clang][bytecode][NFC] Simplify visitDeclRef (#123380) Try to reduce indentation here. --- clang/lib/AST/ByteCode/Compiler.cpp | 105 +++++++++++++++------------- 1 file changed, 56 insertions(+), 49 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 3ef2b0858e667..7afae97f308ad 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -6194,60 +6194,67 @@ bool Compiler::visitDeclRef(const ValueDecl *D, const Expr *E) { return revisit(VD); } - if (D != InitializingDecl) { - // Try to lazily visit (or emit dummy pointers for) declarations - // we haven't seen yet. - if (Ctx.getLangOpts().CPlusPlus) { - if (const auto *VD = dyn_cast(D)) { - const auto typeShouldBeVisited = [&](QualType T) -> bool { - if (T.isConstant(Ctx.getASTContext())) - return true; - return T->isReferenceType(); - }; + // Avoid infinite recursion. + if (D == InitializingDecl) + return this->emitDummyPtr(D, E); + + // Try to lazily visit (or emit dummy pointers for) declarations + // we haven't seen yet. + // For C. + if (!Ctx.getLangOpts().CPlusPlus) { + if (const auto *VD = dyn_cast(D); + VD && VD->getAnyInitializer() && + VD->getType().isConstant(Ctx.getASTContext()) && !VD->isWeak()) + return revisit(VD); + return this->emitDummyPtr(D, E); + } - // DecompositionDecls are just proxies for us. - if (isa(VD)) - return revisit(VD); - - if ((VD->hasGlobalStorage() || VD->isStaticDataMember()) && - typeShouldBeVisited(VD->getType())) { - if (const Expr *Init = VD->getAnyInitializer(); - Init && !Init->isValueDependent()) { - // Whether or not the evaluation is successul doesn't really matter - // here -- we will create a global variable in any case, and that - // will have the state of initializer evaluation attached. - APValue V; - SmallVector Notes; - (void)Init->EvaluateAsInitializer(V, Ctx.getASTContext(), VD, Notes, - true); - return this->visitDeclRef(D, E); - } - return revisit(VD); - } + // ... and C++. + const auto *VD = dyn_cast(D); + if (!VD) + return this->emitDummyPtr(D, E); - // FIXME: The evaluateValue() check here is a little ridiculous, since - // it will ultimately call into Context::evaluateAsInitializer(). In - // other words, we're evaluating the initializer, just to know if we can - // evaluate the initializer. - if (VD->isLocalVarDecl() && typeShouldBeVisited(VD->getType()) && - VD->getInit() && !VD->getInit()->isValueDependent()) { + const auto typeShouldBeVisited = [&](QualType T) -> bool { + if (T.isConstant(Ctx.getASTContext())) + return true; + return T->isReferenceType(); + }; - if (VD->evaluateValue()) - return revisit(VD); + // DecompositionDecls are just proxies for us. + if (isa(VD)) + return revisit(VD); + + if ((VD->hasGlobalStorage() || VD->isStaticDataMember()) && + typeShouldBeVisited(VD->getType())) { + if (const Expr *Init = VD->getAnyInitializer(); + Init && !Init->isValueDependent()) { + // Whether or not the evaluation is successul doesn't really matter + // here -- we will create a global variable in any case, and that + // will have the state of initializer evaluation attached. + APValue V; + SmallVector Notes; + (void)Init->EvaluateAsInitializer(V, Ctx.getASTContext(), VD, Notes, + true); + return this->visitDeclRef(D, E); + } + return revisit(VD); + } + + // FIXME: The evaluateValue() check here is a little ridiculous, since + // it will ultimately call into Context::evaluateAsInitializer(). In + // other words, we're evaluating the initializer, just to know if we can + // evaluate the initializer. + if (VD->isLocalVarDecl() && typeShouldBeVisited(VD->getType()) && + VD->getInit() && !VD->getInit()->isValueDependent()) { + + if (VD->evaluateValue()) + return revisit(VD); - if (!D->getType()->isReferenceType()) - return this->emitDummyPtr(D, E); + if (!D->getType()->isReferenceType()) + return this->emitDummyPtr(D, E); - return this->emitInvalidDeclRef(cast(E), - /*InitializerFailed=*/true, E); - } - } - } else { - if (const auto *VD = dyn_cast(D); - VD && VD->getAnyInitializer() && - VD->getType().isConstant(Ctx.getASTContext()) && !VD->isWeak()) - return revisit(VD); - } + return this->emitInvalidDeclRef(cast(E), + /*InitializerFailed=*/true, E); } return this->emitDummyPtr(D, E); From 9cd12b5652ec0bcf8670aa7c8ddfddf9212aa94c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 17 Jan 2025 21:41:04 -0800 Subject: [PATCH 02/11] [TableGen] Inline a helper function that didn't seem necessary. NFC (#123440) The function called find_if and converted the iterator to an index. The caller then had to check the index being non-zero to know if the find succeeded. Seems better to just do the find and distance in the caller. --- .../utils/TableGen/Common/CodeGenSchedule.cpp | 23 ++++++------------- llvm/utils/TableGen/Common/CodeGenSchedule.h | 1 - 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp index a5ca060533bce..1d1de88499085 100644 --- a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp @@ -809,18 +809,6 @@ void CodeGenSchedModels::expandRWSeqForProc( } } -// Find the existing SchedWrite that models this sequence of writes. -unsigned CodeGenSchedModels::findRWForSequence(ArrayRef Seq, - bool IsRead) { - std::vector &RWVec = IsRead ? SchedReads : SchedWrites; - - auto I = find_if(RWVec, [Seq](CodeGenSchedRW &RW) { - return ArrayRef(RW.Sequence) == Seq; - }); - // Index zero reserved for invalid RW. - return I == RWVec.end() ? 0 : std::distance(RWVec.begin(), I); -} - /// Add this ReadWrite if it doesn't already exist. unsigned CodeGenSchedModels::findOrInsertRW(ArrayRef Seq, bool IsRead) { @@ -828,11 +816,14 @@ unsigned CodeGenSchedModels::findOrInsertRW(ArrayRef Seq, if (Seq.size() == 1) return Seq.back(); - unsigned Idx = findRWForSequence(Seq, IsRead); - if (Idx) - return Idx; - std::vector &RWVec = IsRead ? SchedReads : SchedWrites; + + auto I = find_if(RWVec, [Seq](CodeGenSchedRW &RW) { + return ArrayRef(RW.Sequence) == Seq; + }); + if (I != RWVec.end()) + return std::distance(RWVec.begin(), I); + unsigned RWIdx = RWVec.size(); CodeGenSchedRW SchedRW(RWIdx, IsRead, Seq, genRWName(Seq, IsRead)); RWVec.push_back(SchedRW); diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.h b/llvm/utils/TableGen/Common/CodeGenSchedule.h index f43c856b274ce..d47c03514b155 100644 --- a/llvm/utils/TableGen/Common/CodeGenSchedule.h +++ b/llvm/utils/TableGen/Common/CodeGenSchedule.h @@ -592,7 +592,6 @@ class CodeGenSchedModels { void collectSchedRW(); std::string genRWName(ArrayRef Seq, bool IsRead); - unsigned findRWForSequence(ArrayRef Seq, bool IsRead); void collectSchedClasses(); From 23746c2f6d12a039a79625a40e6727bb67b87a3c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 17 Jan 2025 21:41:34 -0800 Subject: [PATCH 03/11] [TableGen] Use vector constructor instead of calling append or emplace_back on an empty vector. NFC (#123442) --- llvm/utils/TableGen/Common/CodeGenSchedule.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp index 1d1de88499085..0810d15fb35ea 100644 --- a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp @@ -1702,24 +1702,23 @@ void CodeGenSchedModels::inferFromRW(ArrayRef OperWrites, dbgs() << ") "); // Create a seed transition with an empty PredTerm and the expanded sequences // of SchedWrites for the current SchedClass. - std::vector LastTransitions; - LastTransitions.emplace_back(); + std::vector LastTransitions(1); for (unsigned WriteIdx : OperWrites) { IdxVec WriteSeq; expandRWSequence(WriteIdx, WriteSeq, /*IsRead=*/false); - LastTransitions[0].WriteSequences.emplace_back(); - SmallVectorImpl &Seq = LastTransitions[0].WriteSequences.back(); - Seq.append(WriteSeq.begin(), WriteSeq.end()); + SmallVectorImpl &Seq = + LastTransitions[0].WriteSequences.emplace_back(WriteSeq.begin(), + WriteSeq.end()); LLVM_DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") "); } LLVM_DEBUG(dbgs() << " Reads: "); for (unsigned ReadIdx : OperReads) { IdxVec ReadSeq; expandRWSequence(ReadIdx, ReadSeq, /*IsRead=*/true); - LastTransitions[0].ReadSequences.emplace_back(); - SmallVectorImpl &Seq = LastTransitions[0].ReadSequences.back(); - Seq.append(ReadSeq.begin(), ReadSeq.end()); + SmallVectorImpl &Seq = + LastTransitions[0].ReadSequences.emplace_back(ReadSeq.begin(), + ReadSeq.end()); LLVM_DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") "); } LLVM_DEBUG(dbgs() << '\n'); From 6628b5934df8209396b5946e268301416ad7d9f6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 17 Jan 2025 21:41:52 -0800 Subject: [PATCH 04/11] [TableGen] Use a range-based for loop. NFC (#123443) --- llvm/utils/TableGen/Common/CodeGenSchedule.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp index 0810d15fb35ea..60e4363b3c0b9 100644 --- a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp @@ -1581,22 +1581,16 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) { // Visit each original write sequence. for (const auto &WriteSequence : Trans.WriteSequences) { // Push a new (empty) write sequence onto all partial Transitions. - for (std::vector::iterator I = TransVec.begin() + StartIdx, - E = TransVec.end(); - I != E; ++I) { - I->WriteSequences.emplace_back(); - } + for (auto &PT : drop_begin(TransVec, StartIdx)) + PT.WriteSequences.emplace_back(); Subst |= substituteVariantOperand(WriteSequence, /*IsRead=*/false, StartIdx); } // Visit each original read sequence. for (const auto &ReadSequence : Trans.ReadSequences) { // Push a new (empty) read sequence onto all partial Transitions. - for (std::vector::iterator I = TransVec.begin() + StartIdx, - E = TransVec.end(); - I != E; ++I) { - I->ReadSequences.emplace_back(); - } + for (auto &PT : drop_begin(TransVec, StartIdx)) + PT.ReadSequences.emplace_back(); Subst |= substituteVariantOperand(ReadSequence, /*IsRead=*/true, StartIdx); } return Subst; From 909bf38c1fea56aab91b1eb43b8c00c515157a53 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Sat, 18 Jan 2025 14:14:50 +0800 Subject: [PATCH 05/11] [clang-tidy][NFC] remove unused field in UnusedUsingDeclsCheck (#123451) --- clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h | 1 - 1 file changed, 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h index 7bdaf12e8aece..e5f766dbac56b 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h +++ b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h @@ -51,7 +51,6 @@ class UnusedUsingDeclsCheck : public ClangTidyCheck { std::vector Contexts; llvm::SmallPtrSet UsingTargetDeclsCache; - StringRef RawStringHeaderFileExtensions; FileExtensionsSet HeaderFileExtensions; }; From d79e3af8ad126e352338d4fe4e3b2a13c8d587dc Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Sat, 18 Jan 2025 14:48:55 +0800 Subject: [PATCH 06/11] [TableGen] Fix unused-variable warnings in CodeGenSchedule.cpp (NFC) /llvm-project/llvm/utils/TableGen/Common/CodeGenSchedule.cpp:1704:32: error: unused variable 'Seq' [-Werror,-Wunused-variable] SmallVectorImpl &Seq = ^ /llvm-project/llvm/utils/TableGen/Common/CodeGenSchedule.cpp:1713:32: error: unused variable 'Seq' [-Werror,-Wunused-variable] SmallVectorImpl &Seq = ^ 2 errors generated. --- llvm/utils/TableGen/Common/CodeGenSchedule.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp index 60e4363b3c0b9..06ef259c65ccf 100644 --- a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp @@ -1701,7 +1701,7 @@ void CodeGenSchedModels::inferFromRW(ArrayRef OperWrites, for (unsigned WriteIdx : OperWrites) { IdxVec WriteSeq; expandRWSequence(WriteIdx, WriteSeq, /*IsRead=*/false); - SmallVectorImpl &Seq = + [[maybe_unused]] SmallVectorImpl &Seq = LastTransitions[0].WriteSequences.emplace_back(WriteSeq.begin(), WriteSeq.end()); LLVM_DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") "); @@ -1710,7 +1710,7 @@ void CodeGenSchedModels::inferFromRW(ArrayRef OperWrites, for (unsigned ReadIdx : OperReads) { IdxVec ReadSeq; expandRWSequence(ReadIdx, ReadSeq, /*IsRead=*/true); - SmallVectorImpl &Seq = + [[maybe_unused]] SmallVectorImpl &Seq = LastTransitions[0].ReadSequences.emplace_back(ReadSeq.begin(), ReadSeq.end()); LLVM_DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") "); From c3aa86c9de5dfcc40abad01eabb7f9a301b90a2e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 17 Jan 2025 22:54:23 -0800 Subject: [PATCH 07/11] [TableGen] const-correct a couple CodeGenSchedule methods. NFC --- llvm/utils/TableGen/Common/CodeGenSchedule.cpp | 6 +++--- llvm/utils/TableGen/Common/CodeGenSchedule.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp index 06ef259c65ccf..ce03bfc73e770 100644 --- a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp +++ b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp @@ -1740,8 +1740,8 @@ void CodeGenSchedModels::inferFromRW(ArrayRef OperWrites, // Check if any processor resource group contains all resource records in // SubUnits. -bool CodeGenSchedModels::hasSuperGroup(ConstRecVec &SubUnits, - CodeGenProcModel &PM) { +bool CodeGenSchedModels::hasSuperGroup(const ConstRecVec &SubUnits, + const CodeGenProcModel &PM) { for (const Record *ProcResourceDef : PM.ProcResourceDefs) { if (!ProcResourceDef->isSubClassOf("ProcResGroup")) continue; @@ -1759,7 +1759,7 @@ bool CodeGenSchedModels::hasSuperGroup(ConstRecVec &SubUnits, } // Verify that overlapping groups have a common supergroup. -void CodeGenSchedModels::verifyProcResourceGroups(CodeGenProcModel &PM) { +void CodeGenSchedModels::verifyProcResourceGroups(const CodeGenProcModel &PM) { for (unsigned i = 0, e = PM.ProcResourceDefs.size(); i < e; ++i) { if (!PM.ProcResourceDefs[i]->isSubClassOf("ProcResGroup")) continue; diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.h b/llvm/utils/TableGen/Common/CodeGenSchedule.h index d47c03514b155..01f267ae55fad 100644 --- a/llvm/utils/TableGen/Common/CodeGenSchedule.h +++ b/llvm/utils/TableGen/Common/CodeGenSchedule.h @@ -630,8 +630,8 @@ class CodeGenSchedModels { void inferFromItinClass(const Record *ItinClassDef, unsigned FromClassIdx); void inferFromInstRWs(unsigned SCIdx); - bool hasSuperGroup(ConstRecVec &SubUnits, CodeGenProcModel &PM); - void verifyProcResourceGroups(CodeGenProcModel &PM); + bool hasSuperGroup(const ConstRecVec &SubUnits, const CodeGenProcModel &PM); + void verifyProcResourceGroups(const CodeGenProcModel &PM); void collectProcResources(); From 10cfd54e6aedc9c97bad84d6f8d32910f591cd26 Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Sat, 18 Jan 2025 06:56:15 +0000 Subject: [PATCH 08/11] [AArch64] Correct defs and uses on {PAC,AUT}I{A,B}171615 (#123354) I'm not adding tests for this, as I don't think we usually have tests to verify correct description of defs and uses in instructions? This fix will be tested when #122304 lands, as one of the regression tests in that PR fails without this fix. --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 8215f3a4fdae1..40a6b8e4c8e64 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2030,6 +2030,8 @@ let Predicates = [HasPAuthLR] in { // opcode2, opcode, asm def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppcr">; def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppcr">; + } + let Defs = [X17], Uses = [X15, X16, X17] in { // opcode2, opcode, asm def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">; def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">; From 4aedb970097b7ade93127021206199dbb17a4134 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Sat, 18 Jan 2025 07:53:45 +0000 Subject: [PATCH 09/11] [compiler-rt][rtsan] getsockname interception. (#123409) --- compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp | 12 ++++++++++++ .../rtsan/tests/rtsan_test_interceptors_posix.cpp | 10 ++++++++++ 2 files changed, 22 insertions(+) diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp index e3f3d12d7e521..7ab54c24a002f 100644 --- a/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp @@ -829,6 +829,17 @@ INTERCEPTOR(int, getnameinfo, const struct sockaddr *sa, socklen_t salen, return REAL(getnameinfo)(sa, salen, host, hostlen, serv, servlen, flags); } +#if SANITIZER_INTERCEPT_GETSOCKNAME +INTERCEPTOR(int, getsockname, int socket, struct sockaddr *sa, + socklen_t *salen) { + __rtsan_notify_intercepted_call("getsockname"); + return REAL(getsockname)(socket, sa, salen); +} +#define RTSAN_MAYBE_INTERCEPT_GETSOCKNAME INTERCEPT_FUNCTION(getsockname) +#else +#define RTSAN_MAYBE_INTERCEPT_GETSOCKNAME +#endif + INTERCEPTOR(int, bind, int socket, const struct sockaddr *address, socklen_t address_len) { __rtsan_notify_intercepted_call("bind"); @@ -1189,6 +1200,7 @@ void __rtsan::InitializeInterceptors() { INTERCEPT_FUNCTION(shutdown); INTERCEPT_FUNCTION(socket); RTSAN_MAYBE_INTERCEPT_ACCEPT4; + RTSAN_MAYBE_INTERCEPT_GETSOCKNAME; RTSAN_MAYBE_INTERCEPT_SELECT; INTERCEPT_FUNCTION(pselect); diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index c26643c6a2d63..0e03b19e80b6c 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -1153,6 +1153,16 @@ TEST(TestRtsanInterceptors, ShutdownOnASocketDiesWhenRealtime) { ExpectNonRealtimeSurvival(Func); } +#if SANITIZER_INTERCEPT_GETSOCKNAME +TEST(TestRtsanInterceptors, GetsocknameOnASocketDiesWhenRealtime) { + sockaddr addr{}; + socklen_t len{}; + auto Func = [&]() { getsockname(0, &addr, &len); }; + ExpectRealtimeDeath(Func, "getsockname"); + ExpectNonRealtimeSurvival(Func); +} +#endif + /* I/O Multiplexing */ From c3a935e3f967f8f22f5db240d145459ee621c1e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 18 Jan 2025 08:59:30 +0100 Subject: [PATCH 10/11] Revert "[clang][DebugInfo] Emit DW_AT_object_pointer on function declarations with explicit `this`" (#123455) Reverts llvm/llvm-project#122928 --- clang/lib/CodeGen/CGDebugInfo.cpp | 21 ++++++------------- .../CodeGenCXX/debug-info-object-pointer.cpp | 7 ++++--- llvm/include/llvm-c/DebugInfo.h | 11 ++++------ llvm/include/llvm/IR/DIBuilder.h | 6 +++--- llvm/lib/IR/DIBuilder.cpp | 8 ++----- llvm/lib/IR/DebugInfo.cpp | 9 ++++---- 6 files changed, 23 insertions(+), 39 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 6cbcaf0384410..f88f56c98186d 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -2016,15 +2016,13 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( // First element is always return type. For 'void' functions it is NULL. Elts.push_back(Args[0]); - const bool HasExplicitObjectParameter = ThisPtr.isNull(); - - // "this" pointer is always first argument. For explicit "this" - // parameters, it will already be in Args[1]. - if (!HasExplicitObjectParameter) { + // "this" pointer is always first argument. + // ThisPtr may be null if the member function has an explicit 'this' + // parameter. + if (!ThisPtr.isNull()) { llvm::DIType *ThisPtrType = getOrCreateType(ThisPtr, Unit); TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType); - ThisPtrType = - DBuilder.createObjectPointerType(ThisPtrType, /*Implicit=*/true); + ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType); Elts.push_back(ThisPtrType); } @@ -2032,13 +2030,6 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( for (unsigned i = 1, e = Args.size(); i != e; ++i) Elts.push_back(Args[i]); - // Attach FlagObjectPointer to the explicit "this" parameter. - if (HasExplicitObjectParameter) { - assert(Elts.size() >= 2 && Args.size() >= 2 && - "Expected at least return type and object parameter."); - Elts[1] = DBuilder.createObjectPointerType(Args[1], /*Implicit=*/false); - } - llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts); return DBuilder.createSubroutineType(EltTypeArray, OriginalFunc->getFlags(), @@ -5127,7 +5118,7 @@ llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, llvm::DIType *CachedTy = getTypeOrNull(QualTy); if (CachedTy) Ty = CachedTy; - return DBuilder.createObjectPointerType(Ty, /*Implicit=*/true); + return DBuilder.createObjectPointerType(Ty); } void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( diff --git a/clang/test/CodeGenCXX/debug-info-object-pointer.cpp b/clang/test/CodeGenCXX/debug-info-object-pointer.cpp index 49079f5990996..594d4da791ee8 100644 --- a/clang/test/CodeGenCXX/debug-info-object-pointer.cpp +++ b/clang/test/CodeGenCXX/debug-info-object-pointer.cpp @@ -5,11 +5,12 @@ // CHECK: !DIDerivedType(tag: DW_TAG_pointer_type // CHECK-SAME: flags: DIFlagArtificial | DIFlagObjectPointer // +// // FIXME: DIFlagObjectPointer not attached to the explicit object +// // argument in the subprogram declaration. // CHECK: !DISubprogram(name: "explicit_this", // flags: DIFlagPrototyped -// -// CHECK: !DIDerivedType(tag: DW_TAG_rvalue_reference_type -// CHECK-SAME: flags: DIFlagObjectPointer) +// CHECK-NOT: DIFlagObjectPointer +// CHECK-NOT: DIFlagArtificial // // CHECK: !DILocalVariable(name: "this", arg: 1 // CHECK-SAME: flags: DIFlagArtificial | DIFlagObjectPointer diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index ac7ee5a7cc9a1..07f87d44088e7 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -870,16 +870,13 @@ LLVMDIBuilderCreateObjCProperty(LLVMDIBuilderRef Builder, LLVMMetadataRef Ty); /** - * Create a uniqued DIType* clone with FlagObjectPointer. If \c Implicit - * is true, then also set FlagArtificial. + * Create a uniqued DIType* clone with FlagObjectPointer and FlagArtificial set. * \param Builder The DIBuilder. * \param Type The underlying type to which this pointer points. - * \param Implicit Indicates whether this pointer was implicitly generated - * (i.e., not spelled out in source). */ -LLVMMetadataRef LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, - LLVMMetadataRef Type, - LLVMBool Implicit); +LLVMMetadataRef +LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, + LLVMMetadataRef Type); /** * Create debugging information entry for a qualified diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index 6c479415b9ed2..cb1150c269a1d 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -662,9 +662,9 @@ namespace llvm { /// Create a uniqued clone of \p Ty with FlagArtificial set. static DIType *createArtificialType(DIType *Ty); - /// Create a uniqued clone of \p Ty with FlagObjectPointer set. - /// If \p Implicit is true, also set FlagArtificial. - static DIType *createObjectPointerType(DIType *Ty, bool Implicit); + /// Create a uniqued clone of \p Ty with FlagObjectPointer and + /// FlagArtificial set. + static DIType *createObjectPointerType(DIType *Ty); /// Create a permanent forward-declared type. DICompositeType *createForwardDecl(unsigned Tag, StringRef Name, diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index d9bd4f11e89a3..b240a2a39de36 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -644,15 +644,11 @@ DIType *DIBuilder::createArtificialType(DIType *Ty) { return createTypeWithFlags(Ty, DINode::FlagArtificial); } -DIType *DIBuilder::createObjectPointerType(DIType *Ty, bool Implicit) { +DIType *DIBuilder::createObjectPointerType(DIType *Ty) { // FIXME: Restrict this to the nodes where it's valid. if (Ty->isObjectPointer()) return Ty; - DINode::DIFlags Flags = DINode::FlagObjectPointer; - - if (Implicit) - Flags |= DINode::FlagArtificial; - + DINode::DIFlags Flags = DINode::FlagObjectPointer | DINode::FlagArtificial; return createTypeWithFlags(Ty, Flags); } diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 4ce518009bd3e..e5b45e0082a82 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1432,11 +1432,10 @@ LLVMDIBuilderCreateObjCProperty(LLVMDIBuilderRef Builder, PropertyAttributes, unwrapDI(Ty))); } -LLVMMetadataRef LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, - LLVMMetadataRef Type, - LLVMBool Implicit) { - return wrap(unwrap(Builder)->createObjectPointerType(unwrapDI(Type), - Implicit)); +LLVMMetadataRef +LLVMDIBuilderCreateObjectPointerType(LLVMDIBuilderRef Builder, + LLVMMetadataRef Type) { + return wrap(unwrap(Builder)->createObjectPointerType(unwrapDI(Type))); } LLVMMetadataRef From 67c3f2b4303972a6dc8ada54efe1d5d80d119a51 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 18 Jan 2025 08:38:24 +0000 Subject: [PATCH 11/11] [X86] mayFoldIntoStore - peek through oneuse bitcase users to find a store node (#123366) mayFoldIntoStore currently just checks the direct (oneuse) user of a SDValue to check its stored, which prevents cases where we bitcast the value prior to storing (usually the bitcast will be removed later). This patch peeks up through oneuse BITCAST nodes chain to see if its eventually stored. The main use of mayFoldIntoStore is v8i16 EXTRACT_VECTOR_ELT lowering which will only use PEXTRW/PEXTRB for index0 extractions (vs the faster MOVD) if the extracted value will be folded into a store on SSE41+ targets. Fixes #107086 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 +++- .../CodeGen/X86/canonicalize-vars-f16-type.ll | 3 +- llvm/test/CodeGen/X86/cvt16.ll | 3 +- .../test/CodeGen/X86/fp-strict-scalar-fp16.ll | 6 +- llvm/test/CodeGen/X86/fp16-libcalls.ll | 60 +++++++------------ llvm/test/CodeGen/X86/half-constrained.ll | 16 ++--- llvm/test/CodeGen/X86/half-darwin.ll | 6 +- llvm/test/CodeGen/X86/half.ll | 12 ++-- llvm/test/CodeGen/X86/pr91005.ll | 3 +- llvm/test/CodeGen/X86/pr95278.ll | 3 +- .../CodeGen/X86/vector-half-conversions.ll | 6 +- 11 files changed, 50 insertions(+), 79 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 84736f18011a9..8fe2781b148fc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2812,7 +2812,16 @@ bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, } bool X86::mayFoldIntoStore(SDValue Op) { - return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->user_begin()); + if (!Op.hasOneUse()) + return false; + // Peek through (oneuse) bitcast users + SDNode *User = *Op->user_begin(); + while (User->getOpcode() == ISD::BITCAST) { + if (!User->hasOneUse()) + return false; + User = *User->user_begin(); + } + return ISD::isNormalStore(User); } bool X86::mayFoldIntoZeroExtend(SDValue Op) { diff --git a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll index e911a24d830f7..04087c4f0dd5e 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll @@ -53,8 +53,7 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind { ; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512-NEXT: vmovd %xmm0, %eax -; AVX512-NEXT: movw %ax, (%rdi) +; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX512-NEXT: retq entry: %val = load half, half addrspace(1)* %out diff --git a/llvm/test/CodeGen/X86/cvt16.ll b/llvm/test/CodeGen/X86/cvt16.ll index efc457e35e7f3..c6c088297c0ea 100644 --- a/llvm/test/CodeGen/X86/cvt16.ll +++ b/llvm/test/CodeGen/X86/cvt16.ll @@ -34,8 +34,7 @@ define void @test1(float %src, ptr %dest) nounwind { ; F16C-LABEL: test1: ; F16C: # %bb.0: ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; SOFTFLOAT-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll index bf93c8a1f5b51..e1b677e855094 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll @@ -316,8 +316,7 @@ define void @fptrunc_float_to_f16(ptr %val, ptr%ret) nounwind strictfp { ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: movw %ax, (%rsi) +; AVX-NEXT: vpextrw $0, %xmm0, (%rsi) ; AVX-NEXT: retq ; ; X86-LABEL: fptrunc_float_to_f16: @@ -411,8 +410,7 @@ define void @fsqrt_f16(ptr %a) nounwind strictfp { ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: movw %ax, (%rdi) +; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX-NEXT: retq ; ; X86-LABEL: fsqrt_f16: diff --git a/llvm/test/CodeGen/X86/fp16-libcalls.ll b/llvm/test/CodeGen/X86/fp16-libcalls.ll index 0d8290b120fa4..3af8b1aec1feb 100644 --- a/llvm/test/CodeGen/X86/fp16-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp16-libcalls.ll @@ -12,8 +12,7 @@ define void @test_half_ceil(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; FP16-LABEL: test_half_ceil: @@ -108,8 +107,7 @@ define void @test_half_cos(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq cosf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -167,8 +165,7 @@ define void @test_half_exp(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq expf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -226,8 +223,7 @@ define void @test_half_exp2(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq exp2f@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -285,8 +281,7 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq exp10f@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -342,8 +337,7 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; FP16-LABEL: test_half_fabs: @@ -383,8 +377,7 @@ define void @test_half_floor(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; FP16-LABEL: test_half_floor: @@ -438,8 +431,7 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm2, %xmm2 ; F16C-NEXT: callq fmaf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -525,8 +517,7 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; FP16-LABEL: test_half_fneg: @@ -568,8 +559,7 @@ define void @test_half_log(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq logf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -627,8 +617,7 @@ define void @test_half_log2(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq log2f@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -686,8 +675,7 @@ define void @test_half_log10(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq log10f@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -743,8 +731,7 @@ define void @test_half_nearbyint(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; FP16-LABEL: test_half_nearbyint: @@ -797,8 +784,7 @@ define void @test_half_pow(half %a0, half %a1, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 ; F16C-NEXT: callq powf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -876,8 +862,7 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq __powisf2@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -943,8 +928,7 @@ define void @test_half_rint(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; FP16-LABEL: test_half_rint: @@ -996,8 +980,7 @@ define void @test_half_sin(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq sinf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -1053,8 +1036,7 @@ define void @test_half_sqrt(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; FP16-LABEL: test_half_sqrt: @@ -1107,8 +1089,7 @@ define void @test_half_tan(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: callq tanf@PLT ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rbx) +; F16C-NEXT: vpextrw $0, %xmm0, (%rbx) ; F16C-NEXT: popq %rbx ; F16C-NEXT: retq ; @@ -1164,8 +1145,7 @@ define void @test_half_trunc(half %a0, ptr %p0) nounwind { ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; FP16-LABEL: test_half_trunc: diff --git a/llvm/test/CodeGen/X86/half-constrained.ll b/llvm/test/CodeGen/X86/half-constrained.ll index eae9b25e43e06..0f73129d984bd 100644 --- a/llvm/test/CodeGen/X86/half-constrained.ll +++ b/llvm/test/CodeGen/X86/half-constrained.ll @@ -176,8 +176,7 @@ define void @float_to_half(float %0) strictfp { ; X86-F16C: # %bb.0: ; X86-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; X86-F16C-NEXT: vmovd %xmm0, %eax -; X86-F16C-NEXT: movw %ax, a +; X86-F16C-NEXT: vpextrw $0, %xmm0, a ; X86-F16C-NEXT: retl ; ; X64-NOF16C-LABEL: float_to_half: @@ -197,9 +196,8 @@ define void @float_to_half(float %0) strictfp { ; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; X64-F16C-NEXT: vmovd %xmm0, %eax -; X64-F16C-NEXT: movq a@GOTPCREL(%rip), %rcx -; X64-F16C-NEXT: movw %ax, (%rcx) +; X64-F16C-NEXT: movq a@GOTPCREL(%rip), %rax +; X64-F16C-NEXT: vpextrw $0, %xmm0, (%rax) ; X64-F16C-NEXT: retq %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 store half %2, ptr @a, align 2 @@ -354,8 +352,7 @@ define void @add() strictfp { ; X86-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X86-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X86-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; X86-F16C-NEXT: vmovd %xmm0, %eax -; X86-F16C-NEXT: movw %ax, c +; X86-F16C-NEXT: vpextrw $0, %xmm0, c ; X86-F16C-NEXT: retl ; ; X64-NOF16C-LABEL: add: @@ -392,9 +389,8 @@ define void @add() strictfp { ; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; X64-F16C-NEXT: vmovd %xmm0, %eax -; X64-F16C-NEXT: movq c@GOTPCREL(%rip), %rcx -; X64-F16C-NEXT: movw %ax, (%rcx) +; X64-F16C-NEXT: movq c@GOTPCREL(%rip), %rax +; X64-F16C-NEXT: vpextrw $0, %xmm0, (%rax) ; X64-F16C-NEXT: retq %1 = load half, ptr @a, align 2 %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0 diff --git a/llvm/test/CodeGen/X86/half-darwin.ll b/llvm/test/CodeGen/X86/half-darwin.ll index 7388429143df5..3cbf5c11235ea 100644 --- a/llvm/test/CodeGen/X86/half-darwin.ll +++ b/llvm/test/CodeGen/X86/half-darwin.ll @@ -16,8 +16,7 @@ define void @truncsfhf(float %in, ptr %ptr) nounwind { ; CHECK-F16C-LABEL: truncsfhf: ; CHECK-F16C: ## %bb.0: ; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vmovd %xmm0, %eax -; CHECK-F16C-NEXT: movw %ax, (%rdi) +; CHECK-F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; CHECK-F16C-NEXT: retq ; ; CHECK-FP16-LABEL: truncsfhf: @@ -108,8 +107,7 @@ define void @strict_truncsfhf(float %in, ptr %ptr) nounwind strictfp { ; CHECK-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vmovd %xmm0, %eax -; CHECK-F16C-NEXT: movw %ax, (%rdi) +; CHECK-F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; CHECK-F16C-NEXT: retq ; ; CHECK-FP16-LABEL: strict_truncsfhf: diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 7bac075e48680..6e7f109a5da5c 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -146,8 +146,7 @@ define void @test_trunc32(float %in, ptr %addr) #0 { ; BWON-F16C-LABEL: test_trunc32: ; BWON-F16C: # %bb.0: ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; BWON-F16C-NEXT: vmovd %xmm0, %eax -; BWON-F16C-NEXT: movw %ax, (%rdi) +; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; BWON-F16C-NEXT: retq ; ; CHECK-I686-LABEL: test_trunc32: @@ -265,8 +264,7 @@ define void @test_sitofp_i64(i64 %a, ptr %p) #0 { ; BWON-F16C: # %bb.0: ; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; BWON-F16C-NEXT: vmovd %xmm0, %eax -; BWON-F16C-NEXT: movw %ax, (%rsi) +; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) ; BWON-F16C-NEXT: retq ; ; CHECK-I686-LABEL: test_sitofp_i64: @@ -398,8 +396,7 @@ define void @test_uitofp_i64(i64 %a, ptr %p) #0 { ; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; BWON-F16C-NEXT: .LBB10_3: ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; BWON-F16C-NEXT: vmovd %xmm0, %eax -; BWON-F16C-NEXT: movw %ax, (%rsi) +; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) ; BWON-F16C-NEXT: retq ; ; CHECK-I686-LABEL: test_uitofp_i64: @@ -1075,8 +1072,7 @@ define void @main.158() #0 { ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] ; BWON-F16C-NEXT: .LBB20_2: # %entry ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; BWON-F16C-NEXT: vmovd %xmm0, %eax -; BWON-F16C-NEXT: movw %ax, (%rax) +; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rax) ; BWON-F16C-NEXT: retq ; ; CHECK-I686-LABEL: main.158: diff --git a/llvm/test/CodeGen/X86/pr91005.ll b/llvm/test/CodeGen/X86/pr91005.ll index 97fd1ce456882..d73cd7482c390 100644 --- a/llvm/test/CodeGen/X86/pr91005.ll +++ b/llvm/test/CodeGen/X86/pr91005.ll @@ -16,8 +16,7 @@ define void @PR91005(ptr %0) minsize { ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-NEXT: vmovd %xmm0, %eax -; CHECK-NEXT: movw %ax, (%rdi) +; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi) ; CHECK-NEXT: .LBB0_2: # %common.ret ; CHECK-NEXT: retq %2 = bitcast <2 x half> poison to <2 x i16> diff --git a/llvm/test/CodeGen/X86/pr95278.ll b/llvm/test/CodeGen/X86/pr95278.ll index 32783696f4692..104fc04d68cdb 100644 --- a/llvm/test/CodeGen/X86/pr95278.ll +++ b/llvm/test/CodeGen/X86/pr95278.ll @@ -8,8 +8,7 @@ define void @PR95278(ptr %p0, ptr %p1) { ; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] ; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-NEXT: vmovd %xmm0, %eax -; CHECK-NEXT: movw %ax, (%rsi) +; CHECK-NEXT: vpextrw $0, %xmm0, (%rsi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %load = load <1024 x half>, ptr %p0, align 2 diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index ec916060563a7..4e50b56323311 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -2596,15 +2596,13 @@ define void @store_cvt_f32_to_i16(float %a0, ptr %a1) nounwind { ; F16C-LABEL: store_cvt_f32_to_i16: ; F16C: # %bb.0: ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vmovd %xmm0, %eax -; F16C-NEXT: movw %ax, (%rdi) +; F16C-NEXT: vpextrw $0, %xmm0, (%rdi) ; F16C-NEXT: retq ; ; AVX512-LABEL: store_cvt_f32_to_i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512-NEXT: vmovd %xmm0, %eax -; AVX512-NEXT: movw %ax, (%rdi) +; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX512-NEXT: retq %1 = fptrunc float %a0 to half %2 = bitcast half %1 to i16