@@ -11,6 +11,47 @@ struct pt_regs;
1111extern asmlinkage long __x64_sys_ni_syscall (const struct pt_regs * regs );
1212extern asmlinkage long __ia32_sys_ni_syscall (const struct pt_regs * regs );
1313
14+ /*
15+ * Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes
16+ * struct pt_regs *regs as the only argument of the syscall stub(s) named as:
17+ * __x64_sys_*() - 64-bit native syscall
18+ * __ia32_sys_*() - 32-bit native syscall or common compat syscall
19+ * __ia32_compat_sys_*() - 32-bit compat syscall
20+ * __x32_compat_sys_*() - 64-bit X32 compat syscall
21+ *
22+ * The registers are decoded according to the ABI:
23+ * 64-bit: RDI, RSI, RDX, R10, R8, R9
24+ * 32-bit: EBX, ECX, EDX, ESI, EDI, EBP
25+ *
26+ * The stub then passes the decoded arguments to the __se_sys_*() wrapper to
27+ * perform sign-extension (omitted for zero-argument syscalls). Finally the
28+ * arguments are passed to the __do_sys_*() function which is the actual
29+ * syscall. These wrappers are marked as inline so the compiler can optimize
30+ * the functions where appropriate.
31+ *
32+ * Example assembly (slightly re-ordered for better readability):
33+ *
34+ * <__x64_sys_recv>: <-- syscall with 4 parameters
35+ * callq <__fentry__>
36+ *
37+ * mov 0x70(%rdi),%rdi <-- decode regs->di
38+ * mov 0x68(%rdi),%rsi <-- decode regs->si
39+ * mov 0x60(%rdi),%rdx <-- decode regs->dx
40+ * mov 0x38(%rdi),%rcx <-- decode regs->r10
41+ *
42+ * xor %r9d,%r9d <-- clear %r9
43+ * xor %r8d,%r8d <-- clear %r8
44+ *
45+ * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom()
46+ * which takes 6 arguments
47+ *
48+ * cltq <-- extend return value to 64-bit
49+ * retq <-- return
50+ *
51+ * This approach avoids leaking random user-provided register content down
52+ * the call chain.
53+ */
54+
1455/* Mapping of registers to parameters for syscalls on x86-64 and x32 */
1556#define SC_X86_64_REGS_TO_ARGS (x , ...) \
1657 __MAP(x,__SC_ARGS \
@@ -68,6 +109,26 @@ extern asmlinkage long __ia32_sys_ni_syscall(const struct pt_regs *regs);
68109#define __X64_SYS_NI (name )
69110#endif /* CONFIG_X86_64 */
70111
112+ #if defined(CONFIG_X86_32 ) || defined(CONFIG_IA32_EMULATION )
113+ #define __IA32_SYS_STUB0 (name ) \
114+ __SYS_STUB0(ia32, sys_##name)
115+
116+ #define __IA32_SYS_STUBx (x , name , ...) \
117+ __SYS_STUBx(ia32, sys##name, \
118+ SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__))
119+
120+ #define __IA32_COND_SYSCALL (name ) \
121+ __COND_SYSCALL(ia32, sys_##name)
122+
123+ #define __IA32_SYS_NI (name ) \
124+ __SYS_NI(ia32, sys_##name)
125+ #else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
126+ #define __IA32_SYS_STUB0 (name )
127+ #define __IA32_SYS_STUBx (x , name , ...)
128+ #define __IA32_COND_SYSCALL (name )
129+ #define __IA32_SYS_NI (name )
130+ #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
131+
71132#ifdef CONFIG_IA32_EMULATION
72133/*
73134 * For IA32 emulation, we need to handle "compat" syscalls *and* create
@@ -90,27 +151,11 @@ extern asmlinkage long __ia32_sys_ni_syscall(const struct pt_regs *regs);
90151#define __IA32_COMPAT_SYS_NI (name ) \
91152 __SYS_NI(ia32, compat_sys_##name)
92153
93- #define __IA32_SYS_STUB0 (name ) \
94- __SYS_STUB0(ia32, sys_##name)
95-
96- #define __IA32_SYS_STUBx (x , name , ...) \
97- __SYS_STUBx(ia32, sys##name, \
98- SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__))
99-
100- #define __IA32_COND_SYSCALL (name ) \
101- __COND_SYSCALL(ia32, sys_##name)
102-
103- #define __IA32_SYS_NI (name ) \
104- __SYS_NI(ia32, sys_##name)
105154#else /* CONFIG_IA32_EMULATION */
106155#define __IA32_COMPAT_SYS_STUB0 (name )
107156#define __IA32_COMPAT_SYS_STUBx (x , name , ...)
108157#define __IA32_COMPAT_COND_SYSCALL (name )
109158#define __IA32_COMPAT_SYS_NI (name )
110- #define __IA32_SYS_STUB0 (name )
111- #define __IA32_SYS_STUBx (x , name , ...)
112- #define __IA32_COND_SYSCALL (name )
113- #define __IA32_SYS_NI (name )
114159#endif /* CONFIG_IA32_EMULATION */
115160
116161
@@ -180,40 +225,6 @@ extern asmlinkage long __ia32_sys_ni_syscall(const struct pt_regs *regs);
180225
181226#endif /* CONFIG_COMPAT */
182227
183-
184- /*
185- * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes
186- * struct pt_regs *regs as the only argument of the syscall stub named
187- * __x64_sys_*(). It decodes just the registers it needs and passes them on to
188- * the __se_sys_*() wrapper performing sign extension and then to the
189- * __do_sys_*() function doing the actual job. These wrappers and functions
190- * are inlined (at least in very most cases), meaning that the assembly looks
191- * as follows (slightly re-ordered for better readability):
192- *
193- * <__x64_sys_recv>: <-- syscall with 4 parameters
194- * callq <__fentry__>
195- *
196- * mov 0x70(%rdi),%rdi <-- decode regs->di
197- * mov 0x68(%rdi),%rsi <-- decode regs->si
198- * mov 0x60(%rdi),%rdx <-- decode regs->dx
199- * mov 0x38(%rdi),%rcx <-- decode regs->r10
200- *
201- * xor %r9d,%r9d <-- clear %r9
202- * xor %r8d,%r8d <-- clear %r8
203- *
204- * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom()
205- * which takes 6 arguments
206- *
207- * cltq <-- extend return value to 64-bit
208- * retq <-- return
209- *
210- * This approach avoids leaking random user-provided register content down
211- * the call chain.
212- *
213- * If IA32_EMULATION is enabled, this macro generates an additional wrapper
214- * named __ia32_sys_*() which decodes the struct pt_regs *regs according
215- * to the i386 calling convention (bx, cx, dx, si, di, bp).
216- */
217228#define __SYSCALL_DEFINEx (x , name , ...) \
218229 static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
219230 static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
0 commit comments