Skip to content
This repository has been archived by the owner on Jul 1, 2023. It is now read-only.

Commit

Permalink
Fix assembly after adding entry points
Browse files Browse the repository at this point in the history
Summary:
When a given function B, located after function A, references
one of A's basic blocks, it registers a new global symbol at the
reference address and update A's Labels vector via
BinaryFunction::addEntryPoint(). However, we don't update A's branch
targets at this point. So we end up with an inconsistent CFG, where the
basic block names are global symbols, but the internal branch operands
are still referencing the old local name of the corresponding blocks
that got promoted to an entry point. This patch fix this by detecting
this situation in addEntryPoint and iterating over all instructions,
looking for references to the old symbol and replacing them to use the
new global symbol (since this is now an entry point).

Fixes 26

(cherry picked from FBD8728407)
  • Loading branch information
rafaelauler committed Dec 13, 2021
1 parent 7f88288 commit 7422658
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 30 deletions.
19 changes: 19 additions & 0 deletions bolt/src/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1928,6 +1928,20 @@ void BinaryFunction::removeTagsFromProfile() {
}
}

void BinaryFunction::updateReferences(const MCSymbol *From, const MCSymbol *To) {
assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
assert(From && To && "invalid symbols");

for (auto I = Instructions.begin(), E = Instructions.end(); I != E; ++I) {
auto &Inst = I->second;
for (int I = 0, E = MCPlus::getNumPrimeOperands(Inst); I != E; ++I) {
const MCSymbol *S = BC.MIB->getTargetSymbol(Inst, I);
if (S == From)
BC.MIB->setOperandToSymbolRef(Inst, I, To, 0, &*BC.Ctx, 0);
}
}
}

void BinaryFunction::addEntryPoint(uint64_t Address) {
assert(containsAddress(Address) && "address does not belong to the function");

Expand All @@ -1943,6 +1957,8 @@ void BinaryFunction::addEntryPoint(uint64_t Address) {
// If we haven't built CFG for the function, we can add a new entry point
// even if it doesn't have an associated entry in the symbol table.
if (CurrentState == State::Empty || CurrentState == State::Disassembled) {
auto Iter = Labels.find(Offset);
const MCSymbol *OldSym = Iter != Labels.end() ? Iter->second : nullptr;
if (!EntrySymbol) {
DEBUG(dbgs() << "creating local label\n");
EntrySymbol = getOrCreateLocalLabel(Address);
Expand All @@ -1951,6 +1967,9 @@ void BinaryFunction::addEntryPoint(uint64_t Address) {
}
addEntryPointAtOffset(Address - getAddress());
Labels.emplace(Offset, EntrySymbol);
if (OldSym != nullptr && EntrySymbol != OldSym) {
updateReferences(OldSym, EntrySymbol);
}
return;
}

Expand Down
4 changes: 4 additions & 0 deletions bolt/src/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,10 @@ class BinaryFunction {
return getOrCreateLocalLabel(getAddress() + Offset);
}

/// Update all \p From references in the code to refer to \p To. Used
/// in disassembled state only.
void updateReferences(const MCSymbol *From, const MCSymbol *To);

/// This is called in disassembled state.
void addEntryPoint(uint64_t Address);

Expand Down
19 changes: 19 additions & 0 deletions bolt/src/MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,22 @@ MCPlusBuilder::getRegSize(MCPhysReg Reg) const {

return SizeMap[Reg];
}

bool MCPlusBuilder::setOperandToSymbolRef(MCInst &Inst, int OpNum,
const MCSymbol *Symbol,
int64_t Addend, MCContext *Ctx,
uint64_t RelType) const {
MCOperand Operand;
if (!Addend) {
Operand = MCOperand::createExpr(getTargetExprFor(
Inst, MCSymbolRefExpr::create(Symbol, *Ctx), *Ctx, RelType));
} else {
Operand = MCOperand::createExpr(getTargetExprFor(
Inst,
MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, *Ctx),
MCConstantExpr::create(Addend, *Ctx), *Ctx),
*Ctx, RelType));
}
Inst.getOperand(OpNum) = Operand;
return true;
}
6 changes: 6 additions & 0 deletions bolt/src/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,12 @@ class MCPlusBuilder {
return false;
}

/// Discard operand \p OpNum replacing it by a new MCOperand that is a
/// MCExpr referencing \p Symbol + \p Addend.
virtual bool setOperandToSymbolRef(MCInst &Inst, int OpNum,
const MCSymbol *Symbol, int64_t Addend,
MCContext *Ctx, uint64_t RelType) const;

/// Replace an immediate operand in the instruction \p Inst with a reference
/// of the passed \p Symbol plus \p Addend. If the instruction does not have
/// an immediate operand or has more than one - then return false. Otherwise
Expand Down
18 changes: 0 additions & 18 deletions bolt/src/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -968,24 +968,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return true;
}

bool setOperandToSymbolRef(MCInst &Inst, int OpNum, MCSymbol *Symbol,
int64_t Addend, MCContext *Ctx,
uint64_t RelType) const {
MCOperand Operand;
if (!Addend) {
Operand = MCOperand::createExpr(getTargetExprFor(
Inst, MCSymbolRefExpr::create(Symbol, *Ctx), *Ctx, RelType));
} else {
Operand = MCOperand::createExpr(getTargetExprFor(
Inst,
MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, *Ctx),
MCConstantExpr::create(Addend, *Ctx), *Ctx),
*Ctx, RelType));
}
Inst.getOperand(OpNum) = Operand;
return true;
}

bool replaceImmWithSymbol(MCInst &Inst, MCSymbol *Symbol, int64_t Addend,
MCContext *Ctx, int64_t &Value,
uint64_t RelType) const override {
Expand Down
14 changes: 2 additions & 12 deletions bolt/src/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2770,18 +2770,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {

Value = Inst.getOperand(ImmOpNo).getImm();

MCOperand Operand;
if (!Addend) {
Operand = MCOperand::createExpr(getTargetExprFor(
Inst, MCSymbolRefExpr::create(Symbol, *Ctx), *Ctx, RelType));
} else {
Operand = MCOperand::createExpr(getTargetExprFor(
Inst,
MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, *Ctx),
MCConstantExpr::create(Addend, *Ctx), *Ctx),
*Ctx, RelType));
}
Inst.getOperand(ImmOpNo) = Operand;
setOperandToSymbolRef(Inst, ImmOpNo, Symbol, Addend, Ctx, RelType);

return true;
}

Expand Down
76 changes: 76 additions & 0 deletions bolt/test/X86/Inputs/issue26.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
--- !ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_EXEC
Machine: EM_X86_64
Entry: 0x00000000004004FA
Sections:
- Name: .text
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
Address: 0x00000000004003E0
AddressAlign: 0x0000000000000010
Content: 31ED4989D15E4889E24883E4F0505449C7C07005400048C7C10005400048C7C7FA044000E8B7FFFFFFF4660F1F440000B82F10600055482D281060004883F80E4889E577025DC3B8000000004885C074F45DBF28106000FFE00F1F8000000000B82810600055482D2810600048C1F8034889E54889C248C1EA3F4801D048D1F875025DC3BA000000004885D274F45D4889C6BF28106000FFE20F1F8000000000803D9D0B2000007511554889E5E87EFFFFFF5DC6058A0B200001F3C30F1F400048833D7809200000741EB8000000004885C0741455BF200E60004889E5FFD05DE97BFFFFFF0F1F00E973FFFFFF648B0425B0FCFFFF39C70F850C0000004839160F850400000048890EC3B8FFFFFFFFC34839FE0F84F0FFFFFFC34831C0C3669041574189FF41564989F641554989D541544C8D25F808200055488D2DF8082000534C29E531DB48C1FD034883EC08E85DFEFFFF4885ED741E0F1F8400000000004C89EA4C89F64489FF41FF14DC4883C3014839EB75EA4883C4085B5D415C415D415E415FC390662E0F1F840000000000F3C3
- Name: .rela.text
Type: SHT_RELA
Flags: [ SHF_INFO_LINK ]
Link: .symtab
AddressAlign: 0x0000000000000008
Info: .text
Relocations:
- Name: .rodata
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC ]
Address: 0x0000000000400580
AddressAlign: 0x0000000000000008
Content: '01000200000000000000000000000000'
- Name: .dynamic
Type: SHT_DYNAMIC
Flags: [ SHF_WRITE, SHF_ALLOC ]
Address: 0x0000000000600E28
Link: .dynstr
AddressAlign: 0x0000000000000008
Content: 010000000000000001000000000000000C0000000000000090034000000000000D0000000000000074054000000000001900000000000000100E6000000000001B0000000000000008000000000000001A00000000000000180E6000000000001C000000000000000800000000000000F5FEFF6F000000009802400000000000050000000000000000034000000000000600000000000000B8024000000000000A0000000000000038000000000000000B0000000000000018000000000000001500000000000000000000000000000003000000000000000010600000000000020000000000000018000000000000001400000000000000070000000000000017000000000000007803400000000000070000000000000060034000000000000800000000000000180000000000000009000000000000001800000000000000FEFFFF6F000000004003400000000000FFFFFF6F000000000100000000000000F0FFFF6F000000003803400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
Symbols:
Global:
- Name: FUNC
Type: STT_FUNC
Section: .text
Value: 0x00000000004004F0
Size: 0x000000000000000A
- Name: main
Type: STT_FUNC
Section: .text
Value: 0x00000000004004FA
Size: 0x0000000000000004
- Name: XYZ
Type: STT_FUNC
Section: .text
Value: 0x00000000004004CD
Size: 0x0000000000000023
DynamicSymbols:
Global:
- Name: mydata
Section: .rodata
Value: 0x0000000000400100
ProgramHeaders:
- Type: PT_PHDR
Flags: [ PF_X, PF_R ]
VAddr: 0x00400000
PAddr: 0x00400000
Sections:
- Section: .text
- Type: PT_LOAD
Flags: [ PF_X, PF_R ]
VAddr: 0x00400000
PAddr: 0x00400000
Sections:
- Section: .text
- Type: PT_DYNAMIC
Flags: [ PF_X, PF_R ]
VAddr: 0x0061ADA8
PAddr: 0x0064ADA8
Sections:
- Section: .dynamic
...
9 changes: 9 additions & 0 deletions bolt/test/X86/issue26.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# This reproduces issue 26 from our github repo

# RUN: yaml2obj %p/Inputs/issue26.yaml &> %t.exe
# RUN: llvm-bolt %t.exe -relocs -print-cfg -o %t.out \
# RUN: | FileCheck %s

CHECK-NOT: BOLT-WARNING: CFG invalid in XYZ @ .LBB0
CHECK: Binary Function "XYZ"
CHECK: 0000000a: jne FUNCat0x4004e9

0 comments on commit 7422658

Please sign in to comment.