diff --git a/bolt/src/BinaryFunction.cpp b/bolt/src/BinaryFunction.cpp index 40e51eb676bb..a591a87c98fc 100644 --- a/bolt/src/BinaryFunction.cpp +++ b/bolt/src/BinaryFunction.cpp @@ -1928,6 +1928,20 @@ void BinaryFunction::removeTagsFromProfile() { } } +void BinaryFunction::updateReferences(const MCSymbol *From, const MCSymbol *To) { + assert(CurrentState == State::Empty || CurrentState == State::Disassembled); + assert(From && To && "invalid symbols"); + + for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) { + auto &Inst = I->second; + for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) { + const MCSymbol *S = BC.MIB->getTargetSymbol(Inst, I); + if (S == From) + BC.MIB->setOperandToSymbolRef(Inst, I, To, 0, &*BC.Ctx, 0); + } + } +} + void BinaryFunction::addEntryPoint(uint64_t Address) { assert(containsAddress(Address) && "address does not belong to the function"); @@ -1943,6 +1957,8 @@ void BinaryFunction::addEntryPoint(uint64_t Address) { // If we haven't built CFG for the function, we can add a new entry point // even if it doesn't have an associated entry in the symbol table. if (CurrentState == State::Empty || CurrentState == State::Disassembled) { + auto Iter = Labels.find(Offset); + const MCSymbol *OldSym = Iter != Labels.end() ? Iter->second : nullptr; if (!EntrySymbol) { DEBUG(dbgs() << "creating local label\n"); EntrySymbol = getOrCreateLocalLabel(Address); @@ -1951,6 +1967,9 @@ void BinaryFunction::addEntryPoint(uint64_t Address) { } addEntryPointAtOffset(Address - getAddress()); Labels.emplace(Offset, EntrySymbol); + if (OldSym != nullptr && EntrySymbol != OldSym) { + updateReferences(OldSym, EntrySymbol); + } return; } diff --git a/bolt/src/BinaryFunction.h b/bolt/src/BinaryFunction.h index ee48e40ece40..bf5d8933400b 100644 --- a/bolt/src/BinaryFunction.h +++ b/bolt/src/BinaryFunction.h @@ -639,6 +639,10 @@ class BinaryFunction { return getOrCreateLocalLabel(getAddress() + Offset); } + /// Update all \p From references in the code to refer to \p To. Used + /// in disassembled state only. + void updateReferences(const MCSymbol *From, const MCSymbol *To); + /// This is called in disassembled state. void addEntryPoint(uint64_t Address); diff --git a/bolt/src/MCPlusBuilder.cpp b/bolt/src/MCPlusBuilder.cpp index a2de5dd55c9f..0e1949299066 100644 --- a/bolt/src/MCPlusBuilder.cpp +++ b/bolt/src/MCPlusBuilder.cpp @@ -439,3 +439,22 @@ MCPlusBuilder::getRegSize(MCPhysReg Reg) const { return SizeMap[Reg]; } + +bool MCPlusBuilder::setOperandToSymbolRef(MCInst &Inst, int OpNum, + const MCSymbol *Symbol, + int64_t Addend, MCContext *Ctx, + uint64_t RelType) const { + MCOperand Operand; + if (!Addend) { + Operand = MCOperand::createExpr(getTargetExprFor( + Inst, MCSymbolRefExpr::create(Symbol, *Ctx), *Ctx, RelType)); + } else { + Operand = MCOperand::createExpr(getTargetExprFor( + Inst, + MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, *Ctx), + MCConstantExpr::create(Addend, *Ctx), *Ctx), + *Ctx, RelType)); + } + Inst.getOperand(OpNum) = Operand; + return true; +} diff --git a/bolt/src/MCPlusBuilder.h b/bolt/src/MCPlusBuilder.h index 4b08ed46cf62..43932d008dac 100644 --- a/bolt/src/MCPlusBuilder.h +++ b/bolt/src/MCPlusBuilder.h @@ -824,6 +824,12 @@ class MCPlusBuilder { return false; } + /// Discard operand \p OpNum replacing it by a new MCOperand that is a + /// MCExpr referencing \p Symbol + \p Addend. + virtual bool setOperandToSymbolRef(MCInst &Inst, int OpNum, + const MCSymbol *Symbol, int64_t Addend, + MCContext *Ctx, uint64_t RelType) const; + /// Replace an immediate operand in the instruction \p Inst with a reference /// of the passed \p Symbol plus \p Addend. If the instruction does not have /// an immediate operand or has more than one - then return false. Otherwise diff --git a/bolt/src/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/src/Target/AArch64/AArch64MCPlusBuilder.cpp index 44d1edd6857b..02cb252e0b8d 100644 --- a/bolt/src/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/src/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -968,24 +968,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { return true; } - bool setOperandToSymbolRef(MCInst &Inst, int OpNum, MCSymbol *Symbol, - int64_t Addend, MCContext *Ctx, - uint64_t RelType) const { - MCOperand Operand; - if (!Addend) { - Operand = MCOperand::createExpr(getTargetExprFor( - Inst, MCSymbolRefExpr::create(Symbol, *Ctx), *Ctx, RelType)); - } else { - Operand = MCOperand::createExpr(getTargetExprFor( - Inst, - MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, *Ctx), - MCConstantExpr::create(Addend, *Ctx), *Ctx), - *Ctx, RelType)); - } - Inst.getOperand(OpNum) = Operand; - return true; - } - bool replaceImmWithSymbol(MCInst &Inst, MCSymbol *Symbol, int64_t Addend, MCContext *Ctx, int64_t &Value, uint64_t RelType) const override { diff --git a/bolt/src/Target/X86/X86MCPlusBuilder.cpp b/bolt/src/Target/X86/X86MCPlusBuilder.cpp index 3dfe7f3dc81b..7f9e88e2ba82 100644 --- a/bolt/src/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/src/Target/X86/X86MCPlusBuilder.cpp @@ -2770,18 +2770,8 @@ class X86MCPlusBuilder : public MCPlusBuilder { Value = Inst.getOperand(ImmOpNo).getImm(); - MCOperand Operand; - if (!Addend) { - Operand = MCOperand::createExpr(getTargetExprFor( - Inst, MCSymbolRefExpr::create(Symbol, *Ctx), *Ctx, RelType)); - } else { - Operand = MCOperand::createExpr(getTargetExprFor( - Inst, - MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, *Ctx), - MCConstantExpr::create(Addend, *Ctx), *Ctx), - *Ctx, RelType)); - } - Inst.getOperand(ImmOpNo) = Operand; + setOperandToSymbolRef(Inst, ImmOpNo, Symbol, Addend, Ctx, RelType); + return true; } diff --git a/bolt/test/X86/Inputs/issue26.yaml b/bolt/test/X86/Inputs/issue26.yaml new file mode 100644 index 000000000000..6d30bb0d0b2b --- /dev/null +++ b/bolt/test/X86/Inputs/issue26.yaml @@ -0,0 +1,76 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x00000000004004FA +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x00000000004003E0 + AddressAlign: 0x0000000000000010 + Content: 31ED4989D15E4889E24883E4F0505449C7C07005400048C7C10005400048C7C7FA044000E8B7FFFFFFF4660F1F440000B82F10600055482D281060004883F80E4889E577025DC3B8000000004885C074F45DBF28106000FFE00F1F8000000000B82810600055482D2810600048C1F8034889E54889C248C1EA3F4801D048D1F875025DC3BA000000004885D274F45D4889C6BF28106000FFE20F1F8000000000803D9D0B2000007511554889E5E87EFFFFFF5DC6058A0B200001F3C30F1F400048833D7809200000741EB8000000004885C0741455BF200E60004889E5FFD05DE97BFFFFFF0F1F00E973FFFFFF648B0425B0FCFFFF39C70F850C0000004839160F850400000048890EC3B8FFFFFFFFC34839FE0F84F0FFFFFFC34831C0C3669041574189FF41564989F641554989D541544C8D25F808200055488D2DF8082000534C29E531DB48C1FD034883EC08E85DFEFFFF4885ED741E0F1F8400000000004C89EA4C89F64489FF41FF14DC4883C3014839EB75EA4883C4085B5D415C415D415E415FC390662E0F1F840000000000F3C3 + - Name: .rela.text + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x0000000000000008 + Info: .text + Relocations: + - Name: .rodata + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x0000000000400580 + AddressAlign: 0x0000000000000008 + Content: '01000200000000000000000000000000' + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x0000000000600E28 + Link: .dynstr + AddressAlign: 0x0000000000000008 + Content: 010000000000000001000000000000000C0000000000000090034000000000000D0000000000000074054000000000001900000000000000100E6000000000001B0000000000000008000000000000001A00000000000000180E6000000000001C000000000000000800000000000000F5FEFF6F000000009802400000000000050000000000000000034000000000000600000000000000B8024000000000000A0000000000000038000000000000000B0000000000000018000000000000001500000000000000000000000000000003000000000000000010600000000000020000000000000018000000000000001400000000000000070000000000000017000000000000007803400000000000070000000000000060034000000000000800000000000000180000000000000009000000000000001800000000000000FEFFFF6F000000004003400000000000FFFFFF6F000000000100000000000000F0FFFF6F000000003803400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +Symbols: + Global: + - Name: FUNC + Type: STT_FUNC + Section: .text + Value: 0x00000000004004F0 + Size: 0x000000000000000A + - Name: main + Type: STT_FUNC + Section: .text + Value: 0x00000000004004FA + Size: 0x0000000000000004 + - Name: XYZ + Type: STT_FUNC + Section: .text + Value: 0x00000000004004CD + Size: 0x0000000000000023 +DynamicSymbols: + Global: + - Name: mydata + Section: .rodata + Value: 0x0000000000400100 +ProgramHeaders: + - Type: PT_PHDR + Flags: [ PF_X, PF_R ] + VAddr: 0x00400000 + PAddr: 0x00400000 + Sections: + - Section: .text + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + VAddr: 0x00400000 + PAddr: 0x00400000 + Sections: + - Section: .text + - Type: PT_DYNAMIC + Flags: [ PF_X, PF_R ] + VAddr: 0x0061ADA8 + PAddr: 0x0064ADA8 + Sections: + - Section: .dynamic +... diff --git a/bolt/test/X86/issue26.test b/bolt/test/X86/issue26.test new file mode 100755 index 000000000000..e3e13e4e0eea --- /dev/null +++ b/bolt/test/X86/issue26.test @@ -0,0 +1,9 @@ +# This reproduces issue 26 from our github repo + +# RUN: yaml2obj %p/Inputs/issue26.yaml &> %t.exe +# RUN: llvm-bolt %t.exe -relocs -print-cfg -o %t.out \ +# RUN: | FileCheck %s + +CHECK-NOT: BOLT-WARNING: CFG invalid in XYZ @ .LBB0 +CHECK: Binary Function "XYZ" +CHECK: 0000000a: jne FUNCat0x4004e9