Skip to content
Permalink
Browse files

cmd/link: insert trampolines for too-far jumps on ARM

ARM direct CALL/JMP instruction has 24 bit offset, which can only
encodes jumps within +/-32M. When the target is too far, the top
bits get truncated and the program jumps wild.

This CL detects too-far jumps and automatically insert trampolines,
currently only internal linking on ARM.

It is necessary to make the following changes to the linker:
- Resolve direct jump relocs when assigning addresses to functions.
  this allows trampoline insertion without moving all code that
  already laid down.
- Lay down packages in dependency order, so that when resolving a
  inter-package direct jump reloc, the target address is already
  known. Intra-package jumps are assumed never too far.
- a linker flag -debugtramp is added for debugging trampolines:
    "-debugtramp=1 -v" prints trampoline debug message
    "-debugtramp=2"    forces all inter-package jump to use
                       trampolines (currently ARM only)
    "-debugtramp=2 -v" does both
- Some data structures are changed for bookkeeping.

On ARM, pseudo DIV/DIVU/MOD/MODU instructions now clobber R8
(unfortunate). In the standard library there is no ARM assembly
code that uses these instructions, and the compiler no longer emits
them (CL 29390).

all.bash passes with -debugtramp=2, except a disassembly test (this
is unavoidable as we changed the instruction).

TBD: debug info of trampolines?

Fixes #17028.

Change-Id: Idcce347ea7e0af77c4079041a160b2f6e114b474
Reviewed-on: https://go-review.googlesource.com/29397
Reviewed-by: David Crawshaw <crawshaw@golang.org>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
  • Loading branch information...
cherrymui committed Sep 14, 2016
1 parent d03e8b2 commit 7c431cb7f9780fcaf58b9ef07028d5129e1e5fe7
@@ -565,7 +565,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym) {
p.To.Reg = REGTMP
p.To.Offset = 8 * 4 // offset of m.divmod

/* MOV b,REGTMP */
/* MOV b, R8 */
p = obj.Appendp(ctxt, p)
p.As = AMOVW
p.Lineno = q1.Lineno
@@ -575,7 +575,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym) {
p.From.Reg = q1.To.Reg
}
p.To.Type = obj.TYPE_REG
p.To.Reg = REGTMP
p.To.Reg = REG_R8
p.To.Offset = 0

/* CALL appropriate */
@@ -622,6 +622,20 @@ const (
R_ADDRMIPSTLS
)

// IsDirectJump returns whether r is a relocation for a direct jump.
// A direct jump is a CALL or JMP instruction that takes the target address
// as immediate. The address is embedded into the instruction, possibly
// with limited width.
// An indirect jump is a CALL or JMP instruction that takes the target address
// in register or memory.
func (r RelocType) IsDirectJump() bool {
switch r {
case R_CALL, R_CALLARM, R_CALLARM64, R_CALLPOWER, R_CALLMIPS, R_JMPMIPS:
return true
}
return false
}

type Auto struct {
Asym *LSym
Link *Auto
@@ -85,10 +85,10 @@ func gentext(ctxt *ld.Link) {
Addcall(ctxt, initfunc, addmoduledata)
// c: c3 retq
o(0xc3)
ctxt.Textp = append(ctxt.Textp, initfunc)
if ld.Buildmode == ld.BuildmodePlugin {
ctxt.Textp = append(ctxt.Textp, addmoduledata)
}
ctxt.Textp = append(ctxt.Textp, initfunc)
initarray_entry := ctxt.Syms.Lookup("go.link.addmoduledatainit", 0)
initarray_entry.Attr |= ld.AttrReachable
initarray_entry.Attr |= ld.AttrLocal
@@ -95,10 +95,10 @@ func gentext(ctxt *ld.Link) {
rel.Type = obj.R_PCREL
rel.Add = 4

ctxt.Textp = append(ctxt.Textp, initfunc)
if ld.Buildmode == ld.BuildmodePlugin {
ctxt.Textp = append(ctxt.Textp, addmoduledata)
}
ctxt.Textp = append(ctxt.Textp, initfunc)
initarray_entry := ctxt.Syms.Lookup("go.link.addmoduledatainit", 0)
initarray_entry.Attr |= ld.AttrReachable
initarray_entry.Attr |= ld.AttrLocal
@@ -411,6 +411,62 @@ func machoreloc1(s *ld.Symbol, r *ld.Reloc, sectoff int64) int {
return 0
}

// sign extend a 24-bit integer
func signext24(x int64) int32 {
return (int32(x) << 8) >> 8
}

// Convert the direct jump relocation r to refer to a trampoline if the target is too far
func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) {
switch r.Type {
case obj.R_CALLARM:
// r.Add is the instruction
// low 24-bit encodes the target address
t := (ld.Symaddr(r.Sym) + int64(signext24(r.Add&0xffffff)*4) - (s.Value + int64(r.Off))) / 4
if t > 0x7fffff || t < -0x800000 || (*ld.FlagDebugTramp > 1 && s.File != r.Sym.File) {
// direct call too far, need to insert trampoline
offset := (signext24(r.Add&0xffffff) + 2) * 4
var tramp *ld.Symbol
for i := 0; ; i++ {
name := r.Sym.Name + fmt.Sprintf("%+d-tramp%d", offset, i)
tramp = ctxt.Syms.Lookup(name, int(r.Sym.Version))
if tramp.Value == 0 {
// either the trampoline does not exist -- we need to create one,
// or found one the address which is not assigned -- this will be
// laid down immediately after the current function. use this one.
break
}

t = (ld.Symaddr(tramp) - 8 - (s.Value + int64(r.Off))) / 4
if t >= -0x800000 && t < 0x7fffff {
// found an existing trampoline that is not too far
// we can just use it
break
}
}
if tramp.Type == 0 {
// trampoline does not exist, create one
ctxt.AddTramp(tramp)
tramp.Size = 12 // 3 instructions
tramp.P = make([]byte, tramp.Size)
t = ld.Symaddr(r.Sym) + int64(offset)
o1 := uint32(0xe5900000 | 11<<12 | 15<<16) // MOVW (R15), R11 // R15 is actual pc + 8
o2 := uint32(0xe12fff10 | 11) // JMP (R11)
o3 := uint32(t) // WORD $target
ld.SysArch.ByteOrder.PutUint32(tramp.P, o1)
ld.SysArch.ByteOrder.PutUint32(tramp.P[4:], o2)
ld.SysArch.ByteOrder.PutUint32(tramp.P[8:], o3)
}
// modify reloc to point to tramp, which will be resolved later
r.Sym = tramp
r.Add = r.Add&0xff000000 | 0xfffffe // clear the offset embedded in the instruction
r.Done = 0
}
default:
ld.Errorf(s, "trampoline called with non-jump reloc: %v", r.Type)
}
}

func archreloc(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, val *int64) int {
if ld.Linkmode == ld.LinkExternal {
switch r.Type {
@@ -420,10 +476,7 @@ func archreloc(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, val *int64) int {
// set up addend for eventual relocation via outer symbol.
rs := r.Sym

r.Xadd = r.Add
if r.Xadd&0x800000 != 0 {
r.Xadd |= ^0xffffff
}
r.Xadd = int64(signext24(r.Add & 0xffffff))
r.Xadd *= 4
for rs.Outer != nil {
r.Xadd += ld.Symaddr(rs) - ld.Symaddr(rs.Outer)
@@ -444,6 +497,10 @@ func archreloc(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, val *int64) int {
r.Xadd -= ld.Symaddr(s) + int64(r.Off)
}

if r.Xadd/4 > 0x7fffff || r.Xadd/4 < -0x800000 {
ld.Errorf(s, "direct call too far %d", r.Xadd/4)
}

*val = int64(braddoff(int32(0xff000000&uint32(r.Add)), int32(0xffffff&uint32(r.Xadd/4))))
return 0
}
@@ -480,7 +537,13 @@ func archreloc(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, val *int64) int {
return 0

case obj.R_CALLARM: // bl XXXXXX or b YYYYYY
*val = int64(braddoff(int32(0xff000000&uint32(r.Add)), int32(0xffffff&uint32((ld.Symaddr(r.Sym)+int64((uint32(r.Add))*4)-(s.Value+int64(r.Off)))/4))))
// r.Add is the instruction
// low 24-bit encodes the target address
t := (ld.Symaddr(r.Sym) + int64(signext24(r.Add&0xffffff)*4) - (s.Value + int64(r.Off))) / 4
if t > 0x7fffff || t < -0x800000 {
ld.Errorf(s, "direct call too far: %s %x", r.Sym.Name, t)
}
*val = int64(braddoff(int32(0xff000000&uint32(r.Add)), int32(0xffffff&t)))

return 0
}
@@ -50,6 +50,7 @@ func Init() {
ld.Thearch.Archinit = archinit
ld.Thearch.Archreloc = archreloc
ld.Thearch.Archrelocvariant = archrelocvariant
ld.Thearch.Trampoline = trampoline
ld.Thearch.Asmb = asmb
ld.Thearch.Elfreloc1 = elfreloc1
ld.Thearch.Elfsetupplt = elfsetupplt
@@ -118,7 +118,8 @@ func hostArchive(ctxt *Link, name string) {
pname := fmt.Sprintf("%s(%s)", name, arhdr.name)
l = atolwhex(arhdr.size)

h := ldobj(ctxt, f, "libgcc", l, pname, name, ArchiveObj)
libgcc := Library{Pkg: "libgcc"}
h := ldobj(ctxt, f, &libgcc, l, pname, name, ArchiveObj)
f.Seek(h.off, 0)
h.ld(ctxt, f, h.pkg, h.length, h.pn)
}
@@ -314,6 +314,48 @@ func listsort(l *Symbol) *Symbol {
return l
}

// isRuntimeDepPkg returns whether pkg is the runtime package or its dependency
func isRuntimeDepPkg(pkg string) bool {
switch pkg {
case "runtime",
"sync/atomic": // runtime may call to sync/atomic, due to go:linkname
return true
}
return strings.HasPrefix(pkg, "runtime/internal/") && !strings.HasSuffix(pkg, "_test")
}

// detect too-far jumps in function s, and add trampolines if necessary
// (currently only ARM supports trampoline insertion)
func trampoline(ctxt *Link, s *Symbol) {
if Thearch.Trampoline == nil {
return // no need or no support of trampolines on this arch
}
if Linkmode == LinkExternal {
return // currently only support internal linking
}

for ri := range s.R {
r := &s.R[ri]
if !r.Type.IsDirectJump() {
continue
}
if Symaddr(r.Sym) == 0 && r.Sym.Type != obj.SDYNIMPORT {
if r.Sym.File != s.File {
if !isRuntimeDepPkg(s.File) || !isRuntimeDepPkg(r.Sym.File) {
Errorf(s, "unresolved inter-package jump to %s(%s)", r.Sym, r.Sym.File)
}
// runtime and its dependent packages may call to each other.
// they are fine, as they will be laid down together.
}
continue
}

Thearch.Trampoline(ctxt, r, s)
}

}

// resolve relocations in s.
func relocsym(ctxt *Link, s *Symbol) {
var r *Reloc
var rs *Symbol
@@ -325,6 +367,7 @@ func relocsym(ctxt *Link, s *Symbol) {

for ri := int32(0); ri < int32(len(s.R)); ri++ {
r = &s.R[ri]

r.Done = 1
off = r.Off
siz = int32(r.Siz)
@@ -1978,52 +2021,86 @@ func (ctxt *Link) textaddress() {
va := uint64(*FlagTextAddr)
n := 1
sect.Vaddr = va
ntramps := 0
for _, sym := range ctxt.Textp {
sym.Sect = sect
if sym.Type&obj.SSUB != 0 {
continue
}
if sym.Align != 0 {
va = uint64(Rnd(int64(va), int64(sym.Align)))
} else {
va = uint64(Rnd(int64(va), int64(Funcalign)))
}
sym.Value = 0
for sub := sym; sub != nil; sub = sub.Sub {
sub.Value += int64(va)
sect, n, va = assignAddress(ctxt, sect, n, sym, va)

trampoline(ctxt, sym) // resolve jumps, may add trampolines if jump too far

// lay down trampolines after each function
for ; ntramps < len(ctxt.tramps); ntramps++ {
tramp := ctxt.tramps[ntramps]
sect, n, va = assignAddress(ctxt, sect, n, tramp, va)
}
funcsize := uint64(MINFUNC) // spacing required for findfunctab
if sym.Size > MINFUNC {
funcsize = uint64(sym.Size)
}

sect.Length = va - sect.Vaddr
ctxt.Syms.Lookup("runtime.etext", 0).Sect = sect

// merge tramps into Textp, keeping Textp in address order
if ntramps != 0 {
newtextp := make([]*Symbol, 0, len(ctxt.Textp)+ntramps)
i := 0
for _, sym := range ctxt.Textp {
for ; i < ntramps && ctxt.tramps[i].Value < sym.Value; i++ {
newtextp = append(newtextp, ctxt.tramps[i])
}
newtextp = append(newtextp, sym)
}
newtextp = append(newtextp, ctxt.tramps[i:ntramps]...)

ctxt.Textp = newtextp
}
}

// assigns address for a text symbol, returns (possibly new) section, its number, and the address
// Note: once we have trampoline insertion support for external linking, this function
// will not need to create new text sections, and so no need to return sect and n.
func assignAddress(ctxt *Link, sect *Section, n int, sym *Symbol, va uint64) (*Section, int, uint64) {
sym.Sect = sect
if sym.Type&obj.SSUB != 0 {
return sect, n, va
}
if sym.Align != 0 {
va = uint64(Rnd(int64(va), int64(sym.Align)))
} else {
va = uint64(Rnd(int64(va), int64(Funcalign)))
}
sym.Value = 0
for sub := sym; sub != nil; sub = sub.Sub {
sub.Value += int64(va)
}

funcsize := uint64(MINFUNC) // spacing required for findfunctab
if sym.Size > MINFUNC {
funcsize = uint64(sym.Size)
}

// On ppc64x a text section should not be larger than 2^26 bytes due to the size of
// call target offset field in the bl instruction. Splitting into smaller text
// sections smaller than this limit allows the GNU linker to modify the long calls
// appropriately. The limit allows for the space needed for tables inserted by the linker.
// On ppc64x a text section should not be larger than 2^26 bytes due to the size of
// call target offset field in the bl instruction. Splitting into smaller text
// sections smaller than this limit allows the GNU linker to modify the long calls
// appropriately. The limit allows for the space needed for tables inserted by the linker.

// If this function doesn't fit in the current text section, then create a new one.
// If this function doesn't fit in the current text section, then create a new one.

// Only break at outermost syms.
// Only break at outermost syms.

if SysArch.InFamily(sys.PPC64) && sym.Outer == nil && Iself && Linkmode == LinkExternal && va-sect.Vaddr+funcsize > 0x1c00000 {
if SysArch.InFamily(sys.PPC64) && sym.Outer == nil && Iself && Linkmode == LinkExternal && va-sect.Vaddr+funcsize > 0x1c00000 {

// Set the length for the previous text section
sect.Length = va - sect.Vaddr
// Set the length for the previous text section
sect.Length = va - sect.Vaddr

// Create new section, set the starting Vaddr
sect = addsection(&Segtext, ".text", 05)
sect.Vaddr = va
// Create new section, set the starting Vaddr
sect = addsection(&Segtext, ".text", 05)
sect.Vaddr = va

// Create a symbol for the start of the secondary text sections
ctxt.Syms.Lookup(fmt.Sprintf("runtime.text.%d", n), 0).Sect = sect
n++
}
va += funcsize
// Create a symbol for the start of the secondary text sections
ctxt.Syms.Lookup(fmt.Sprintf("runtime.text.%d", n), 0).Sect = sect
n++
}
va += funcsize

sect.Length = va - sect.Vaddr
ctxt.Syms.Lookup("runtime.etext", 0).Sect = sect
return sect, n, va
}

// assign addresses
@@ -2246,3 +2323,14 @@ func (ctxt *Link) address() {
ctxt.xdefine("runtime.enoptrbss", obj.SNOPTRBSS, int64(noptrbss.Vaddr+noptrbss.Length))
ctxt.xdefine("runtime.end", obj.SBSS, int64(Segdata.Vaddr+Segdata.Length))
}

// add a trampoline with symbol s (to be laid down after the current function)
func (ctxt *Link) AddTramp(s *Symbol) {
s.Type = obj.STEXT
s.Attr |= AttrReachable
s.Attr |= AttrOnList
ctxt.tramps = append(ctxt.tramps, s)
if *FlagDebugTramp > 0 && ctxt.Debugvlog > 0 {
ctxt.Logf("trampoline %s inserted\n", s)
}
}
@@ -7,7 +7,9 @@ package ld
import (
"cmd/internal/obj"
"cmd/internal/sys"
"flag"
"fmt"
"path/filepath"
"strings"
"unicode"
)
@@ -244,7 +246,8 @@ func (d *deadcodepass) init() {
if *FlagLinkshared && (Buildmode == BuildmodeExe || Buildmode == BuildmodePIE) {
names = append(names, "main.main", "main.init")
} else if Buildmode == BuildmodePlugin {
pluginInit := d.ctxt.Library[0].Pkg + ".init"
pluginName := strings.TrimSuffix(filepath.Base(flag.Arg(0)), ".a")
pluginInit := pluginName + ".init"
names = append(names, pluginInit, "go.plugin.tabs")

// We don't keep the go.plugin.exports symbol,

0 comments on commit 7c431cb

Please sign in to comment.
You can’t perform that action at this time.