-
Notifications
You must be signed in to change notification settings - Fork 210
/
Copy pathsyscallas.S
274 lines (234 loc) · 8.21 KB
/
syscallas.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/* Copyright (C) 2020 Intel Corporation
* Borys Popławski <borysp@invisiblethingslab.com>
*/
/*
* This file contains the entry point of system call table in library OS (the function syscalldb()).
*
* The below entry point implementation first saves the CPU context of the current application
* thread on the thread's LibOS stack, then calls the LibOS syscall-emulation function, which, upon
* returning, calls context restoring function, which passes control back to the application.
* The context consists of GPRs, FP control word (fpcw) and the SSE/AVX/... control word (mxcsr).
*
* Note that LibOS may clobber all FP/SSE/AVX/... (extended) state except the control words. We rely
* on the fact that applications do *not* assume that this extended state is preserved across system
* calls. Indeed, the extended state (bar control words) is explicitly described as *not* preserved
* by the System V ABI, and though syscall ABI is not the same as System V ABI, we assume that no
* sane application issues syscalls in a non-System-V compliant manner. See System V ABI docs
* (https://uclibc.org/docs/psABI-x86_64.pdf), "Register Usage" for more information.
*/
#include "asm-offsets.h"
.extern shim_emulate_syscall
.extern shim_xstate_size
.extern shim_xstate_restore
.global syscalldb
.type syscalldb, @function
syscalldb:
# On entry to this function rcx contains the return value (next instruction after syscall),
# all other registers can have arbitrary values.
# We have to be very careful with executed instructions not to change any flags until they
# are saved!
.cfi_startproc
.cfi_def_cfa %rsp, 0
.cfi_register %rip, %rcx
# We can clobber r11 as it will be set to rflags later on.
mov %rsp, %r11
.cfi_undefined %r11
.cfi_register %rsp, %r11
.cfi_def_cfa_register %r11
mov %gs:(SHIM_TCB_OFF + SHIM_TCB_LIBOS_STACK_OFF), %rsp
# Create PAL_CONTEXT struct on the stack.
# reserve space for mxcsr + fpcw + is_fpregs_used
pushq $0
# fpregs, but for now we use this to store rax - to get a scratch register
push %rax
# err + trapno + oldmask + cr2 are cleared for a syscall frame
mov $0, %eax
push %rax
push %rax
push %rax
push %rax
# csgsfsss - default value, as we do not support changing it
mov $(0x2b << 48 | 0x33), %rax
push %rax
# after this we can use instructions changing flags
pushfq
# Debuggers use Trap Flag (TF) of EFLAGS to do single-stepping - otherwise it is unused by normal
# applications. If the previous instruction was single-stepped, it stored TF, so reset it here.
andq $~0x100, (%rsp)
# Set default rflags value (just IF set).
pushq $0x202
popfq
push %rcx # rip
push %r11 # rsp
.cfi_def_cfa %rsp, 0x50
.cfi_rel_offset %rsp, 0
.cfi_rel_offset %rip, 8
# Set r11 to rflags
mov 0x10(%rsp), %r11
push %rcx
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %rcx, 0
# rax was saved in fpregs, save it in proper place now, fpregs will be populated later
pushq 0x48(%rsp)
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %rax, 0
push %rdx
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %rdx, 0
push %rbx
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %rbx, 0
push %rbp
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %rbp, 0
push %rsi
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %rsi, 0
push %rdi
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %rdi, 0
push %r15
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %r15, 0
push %r14
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %r14, 0
push %r13
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %r13, 0
push %r12
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %r12, 0
push %r11
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %r11, 0
push %r10
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %r10, 0
push %r9
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %r9, 0
push %r8
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %r8, 0
# PAL_CONTEXT struct ends here.
mov %rsp, %r15
.cfi_def_cfa_register %r15
and $~0xF, %rsp # Required by System V AMD64 ABI.
# save FP Control Word & MXCSR into current thread's TCB
stmxcsr PAL_CONTEXT_MXCSR_OFF(%r15)
fnstcw PAL_CONTEXT_FPCW_OFF(%r15)
# fpregs is not populated, so is_fpregs_used should be 0.
movb $0, PAL_CONTEXT_FPREGS_USED_OFF(%r15)
call shim_xstate_size
sub %rax, %rsp # allocate space for xstate
and $~(SHIM_XSTATE_ALIGN - 1), %rsp
mov %rsp, PAL_CONTEXT_FPREGS_OFF(%r15)
and $~0xF, %rsp # Required by System V AMD64 ABI.
xor %ebp, %ebp
#ifdef DEBUG
# Pretend that this function (`syscalldb`) is called from somewhere inside a function called
# `__morestack`. This is the only way to have a backtrace in GDB spanning from LibOS/Pal,
# through `syscalldb` to the user application code/libc, because GDB does not handle switching
# stacks in the middle of backtrace, unless the function doing it is called `__morestack`.
# Thanks GDB!
# Technical details: we load an address somewhere inside `__morestack` (this cannot be the first
# instruction in there) into r14 (a callee-saved register) and mark it as holding old rip. This
# way GDB thinks `syscalldb` was called by `__morestack`. Inside `__morestack` we mark all
# registers as having the same value as in the previous frame (basically a no-op frame). Now GDB
# sees a backtrace: `user_function` -> `__morestack` -> `syscalldb`, with `__morestack` having
# the same stack value as `user_function` and `syscalldb` having the new stack value. This makes
# GDB happy and it prints correct backtrace across all these functions, which is what we are
# after with all this madness.
lea Lmorestack_for_gdb_bt(%rip), %r14
.cfi_register %rip, %r14
#endif
mov %r15, %rdi
call shim_emulate_syscall # this does not return
# Just to make return address point inside this function.
ud2
.cfi_endproc
.size syscalldb, .-syscalldb
#ifdef DEBUG
.global __morestack
.type __morestack, @function
__morestack:
.cfi_startproc
.cfi_register %rip, %rcx
.cfi_same_value %r8
.cfi_same_value %r9
.cfi_same_value %r10
.cfi_same_value %r11
.cfi_same_value %r12
.cfi_same_value %r13
.cfi_same_value %r14
.cfi_same_value %r15
.cfi_same_value %rdi
.cfi_same_value %rsi
.cfi_same_value %rbp
.cfi_same_value %rbx
.cfi_same_value %rdx
.cfi_same_value %rax
.cfi_same_value %rcx
.cfi_same_value %rsp
nop
Lmorestack_for_gdb_bt:
nop
.cfi_endproc
.size __morestack, .-__morestack
#endif
.global _return_from_syscall
.type _return_from_syscall, @function
_return_from_syscall:
# expects one argument (in `rdi`) - pointer to PAL_CONTEXT
.cfi_startproc
mov %rdi, %rbx
movb PAL_CONTEXT_FPREGS_USED_OFF(%rbx), %al
test %al, %al
jne .Lrestore_xstate
# restore FP Control Word & MXCSR from TCB
fldcw PAL_CONTEXT_FPCW_OFF(%rbx)
ldmxcsr PAL_CONTEXT_MXCSR_OFF(%rbx)
.Lrestore_context:
# After this line cfi will be broken, but we don't care much since this does not call anything
# and just restores the user context, so it will not be visible in any backtrace.
# Note that fixing it is not trivial - we would need the trick with `__morestack`, but we do not
# have neither a stack, nor a scratch register.
mov %rbx, %rsp
pop %r8
pop %r9
pop %r10
pop %r11
pop %r12
pop %r13
pop %r14
pop %r15
pop %rdi
pop %rsi
pop %rbp
pop %rbx
pop %rdx
# exchange rcx with rip
mov 0x8(%rsp), %rcx
mov 0x18(%rsp), %rax
mov %rcx, 0x18(%rsp)
mov %rax, 0x8(%rsp)
# exchange rsp with flags
mov 0x10(%rsp), %rcx
mov 0x20(%rsp), %rax
mov %rcx, 0x20(%rsp)
mov %rax, 0x10(%rsp)
pop %rax
pop %rcx # rip
popfq
mov %rcx, %gs:(SHIM_TCB_OFF + SHIM_TCB_SCRATCH_PC_OFF)
pop %rcx
pop %rsp
jmp *%gs:(SHIM_TCB_OFF + SHIM_TCB_SCRATCH_PC_OFF)
.Lrestore_xstate:
mov PAL_CONTEXT_FPREGS_OFF(%rbx), %rdi
call shim_xstate_restore
jmp .Lrestore_context
.cfi_endproc
.size _return_from_syscall, .-_return_from_syscall