-
Notifications
You must be signed in to change notification settings - Fork 224
/
stubs.go
269 lines (236 loc) · 7.63 KB
/
stubs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Apache License 2.0.
* See the file "LICENSE" for details.
*/
package hotspot
import (
"encoding/binary"
"fmt"
"runtime"
"sort"
"strings"
"github.com/elastic/otel-profiling-agent/debug/log"
"github.com/elastic/otel-profiling-agent/libpf"
"github.com/elastic/otel-profiling-agent/libpf/armhelpers"
"github.com/elastic/otel-profiling-agent/libpf/remotememory"
"github.com/elastic/otel-profiling-agent/support"
aa "golang.org/x/arch/arm64/arm64asm"
)
// nextAligned aligns a pointer up, to the next multiple of align.
func nextAligned(ptr libpf.Address, align uint64) libpf.Address {
return (ptr + libpf.Address(align)) & ^(libpf.Address(align) - 1)
}
// StubRoutine marks a logical function within the StubRoutines blob.
type StubRoutine struct {
name string
start, end libpf.Address
}
// findStubBounds heuristically determines the bounds of individual functions
// within the larger StubRoutines blobs. We receive pointers for most of the
// stubs from VM structs / JVMCI VM structs, but not the lengths.
//
// This function first collects all routines and sorts them by start address.
// The start of the next stub is then taken as the maximum length of the current
// stub. This works great for most cases, but some functions are missing in VM
// structs (and would thus be assigned to the previous stub), and the last
// function doesn't have anything following it to serve as a boundary.
//
// To handle these edge-cases, we additionally do a sweep for NOP instructions
// that are used as padding between subroutines. One might be inclined to rely
// on this NOP heuristic only, but it's not sufficient alone either: the
// previous stub function might have the perfect length for the next one to
// not need alignment. Also in some cases the JVM devs omitted/forgot to insert
// the padding. The two heuristics combined, however, yield reliable results.
func findStubBounds(vmd *hotspotVMData, bias libpf.Address,
rm remotememory.RemoteMemory) []StubRoutine {
const CodeAlign = 64
const MaxStubLen = 8 * 1024
stubs := make([]StubRoutine, 0, 64)
for field, addr := range vmd.vmStructs.StubRoutines.CatchAll {
if strings.Contains(field, "_table_") {
continue
}
// Not all stubs are generated for all architectures.
entry := rm.Ptr(addr + bias)
if entry == 0 {
continue
}
stubs = append(stubs, StubRoutine{
name: strings.TrimPrefix(field, "_"),
start: entry,
end: 0, // filled in later
})
}
sort.Slice(stubs, func(i, j int) bool {
if stubs[i].start != stubs[j].start {
return stubs[i].start < stubs[j].start
}
// Secondary ordering by name to ensure that we produce deterministic
// results even in the presence of stub aliases (same start address).
return stubs[i].name < stubs[j].name
})
filtered := make([]StubRoutine, 0, len(stubs))
for i := 0; i < len(stubs); i++ {
cur := &stubs[i]
// Some stubs re-use the code from another stub. Skip elements until
// we detected the next stub that doesn't occupy the same address.
for i < len(stubs) {
if i != len(stubs)-1 {
// Beginning of next element marks the maximum length of the
// previous one.
next := &stubs[i+1]
cur.end = next.start
} else {
// Last element: assume max length and let the disassembler
// heuristic below deal with that case.
cur.end = cur.start + MaxStubLen - 1
}
if cur.start == cur.end {
i++
} else {
break
}
}
// Sweep for stub function boundary.
heuristicEnd := libpf.Address(0)
NopHeuristic:
for p := nextAligned(cur.start, CodeAlign); p < cur.start+MaxStubLen; p += CodeAlign {
const NopARM4 = 0xD503201F
const NopAMD64 = 0x90
block := make([]byte, CodeAlign)
if err := rm.Read(p-CodeAlign, block); err != nil {
continue
}
// Last function in each stub is followed by zeros.
if libpf.SliceAllEqual(block, 0) {
heuristicEnd = p
break NopHeuristic
}
// Other functions are separated by NOPs.
switch runtime.GOARCH {
case "arm64": //nolint:goconst
if binary.LittleEndian.Uint32(block[len(block)-4:]) == NopARM4 {
heuristicEnd = p
break NopHeuristic
}
case "amd64":
if block[len(block)-1] == NopAMD64 {
heuristicEnd = p
break NopHeuristic
}
default:
panic("unexpected architecture")
}
}
// Pick the minimum of both heuristics as length.
if heuristicEnd != 0 {
cur.end = min(cur.end, heuristicEnd)
}
if cur.end-cur.start > MaxStubLen {
log.Debugf("Unable to determine length for JVM stub %s", cur.name)
continue
}
filtered = append(filtered, *cur)
}
return filtered
}
// analyzeStubArm64 disassembles the first 16 instructions of an ARM64 stub in
// an attempt to detect whether it has a frame or needs an SP offset.
//
// Examples of cases currently handled by this function:
//
// Stack frame setup (checkcast_arraycopy_uninit):
// >>> STP X29, X30, [SP,#-0x10]!
// >>> MOV X29, SP
//
// Stack alloc without frame via mutating STP variant (sha256_implCompress):
// >>> STP D8, D9, [SP,#-0x20]!
//
// Stack alloc with SUB after a few instructions (ghash_processBlocks_wide):
// >>> CMP X3, #8
// >>> B.LT loc_4600
// >>> SUB SP, SP, #0x40
func analyzeStubArm64(rm remotememory.RemoteMemory, addr libpf.Address) (
hasFrame bool, spOffs int64, err error) {
code := make([]byte, 64)
if err := rm.Read(addr, code); err != nil {
return false, 0, err
}
Outer:
for offs := 0; offs < len(code); offs += 4 {
insn, err := aa.Decode(code[offs : offs+4])
if err != nil {
return false, 0, fmt.Errorf("failed to decode instruction: %v", err)
}
const SP = aa.RegSP(aa.SP)
switch insn.Op {
case aa.STP:
if insn.Args[0] == aa.X29 && insn.Args[1] == aa.X30 {
// Assume this is a frame pointer setup.
return true, 0, nil
}
if arg, ok := insn.Args[2].(aa.MemImmediate); ok {
if arg.Base != SP {
continue
}
if arg.Mode != aa.AddrPostIndex && arg.Mode != aa.AddrPreIndex {
continue
}
imm, ok := armhelpers.DecodeImmediate(arg)
if !ok {
continue
}
spOffs += int64(imm)
}
case aa.SUB:
for _, arg := range insn.Args[:2] {
if arg, ok := arg.(aa.RegSP); !ok || arg != SP {
continue Outer
}
}
imm, ok := armhelpers.DecodeImmediate(insn.Args[2])
if !ok {
continue
}
spOffs -= int64(imm)
}
}
return false, spOffs, nil
}
// jitAreaForStubArm64 synthesizes a jitArea for an ARM64 stub routine.
//
// We currently don't make any attempts to generate extra areas for the pro-
// and epilogues of the functions and (incorrectly) assume the SP deltas for
// the duration of the whole function. We expect it to be sufficiently rare
// that sampling catches the pro/epilogues that it isn't really worth special
// casing this any further.
func jitAreaForStubArm64(stub *StubRoutine, heap *jitArea,
rm remotememory.RemoteMemory) (jitArea, error) {
var hasFrame bool
var spOffs int64
if stub.name == "call_stub_return_address" {
// Special-case: this is not an actual individual stub function,
// but rather a pointer into the middle of the call stub.
hasFrame = true
} else {
var err error
hasFrame, spOffs, err = analyzeStubArm64(rm, stub.start)
if err != nil {
return jitArea{}, fmt.Errorf("failed to analyze stub: %v", err)
}
}
tsid := heap.tsid | 1<<support.HSTSIDIsStubBit
if hasFrame {
tsid |= 1 << support.HSTSIDHasFrameBit
} else {
sd := uint64(-spOffs) / support.HSTSIDStackDeltaScale
tsid |= sd << support.HSTSIDStackDeltaBit
}
return jitArea{
start: stub.start,
end: stub.end,
codeStart: heap.codeStart,
tsid: tsid,
}, nil
}