Skip to content

Commit

Permalink
[LibOS] Add sys.mock_syscalls = [ ... ] manifest option
Browse files Browse the repository at this point in the history
This commit adds the manifest syntax `sys.mock_syscalls = [ ... ]`
to specify system calls that will be mocked when executed in Gramine
(i.e. return a specified value without any other side effects).

This may be particularly important for cases where the overhead of
invoking a system call on the host (e.g. exiting the SGX enclave)
becomes a performance bottleneck, and it is more beneficial to disable
or no-op the syscall in the first place; `sched_yield()` is an example.

Another example may be disabling certain functionalities for security
reasons. For example, one may want to disable `eventfd()` and
`eventfd2()` to forbid creation of eventfd objects.

Yet another example may be mocking syscalls currently not implemented in
Gramine. E.g. it may be enough to mock `vhangup()` to always return 0,
so that the workload proceeds further.

Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvaiskii@intel.com>
  • Loading branch information
dimakuv committed May 16, 2024
1 parent 25056d0 commit 4a5ce3e
Show file tree
Hide file tree
Showing 14 changed files with 274 additions and 5 deletions.
42 changes: 42 additions & 0 deletions Documentation/manifest-syntax.rst
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,48 @@ Python). Could be useful in SGX environments: child processes consume
to achieve this, you need to run the whole Gramine inside a proper security
sandbox.
Mocking syscalls
^^^^^^^^^^^^^^^^

::

sys.mock_syscalls = [
{ name = "syscall_name1", return = 0 }, # no-op syscall
{ name = "syscall_name2", return = -38 }, # denied syscall (ENOSYS)
]

This syntax specifies the system calls that are mocked when executed in
Gramine (i.e. they return a specified value without any other side effects).
If ``return`` field is skipped, then the default value is ``0`` (no-op).

For example, to skip ``sched_yield`` syscall, specify::

sys.mock_syscalls = [
{ name = "sched_yield", return = 0 },
]

As another example, to disallow eventfd completely, specify::

sys.mock_syscalls = [
{ name = "eventfd", return = -38 },
{ name = "eventfd", return = -38 },
]


.. note ::
This option is *not* a replacement for ``sys.disallow_subprocesses`` (see
above). This is because the ``clone()`` syscall has two usages: (1) it is
used to spawn subprocesses by Glibc and many other libraries and runtimes and
(2) it is also used to create threads in the same process. The
``sys.disallow_subprocesses`` manifest option disables only the first usage,
whereas ``sys.mock_syscalls = [ name = "clone", ...]`` disables both usages.
.. note ::
This option is *not* a security feature. Its rationale is improving
performance (the example of ``sched_yield``), mocking syscalls currently not
implemented in Gramine, and limiting syscalls exposed to the app.
Root FS mount point
^^^^^^^^^^^^^^^^^^^

Expand Down
3 changes: 3 additions & 0 deletions libos/include/libos_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,11 @@ extern bool g_eventfd_passthrough_mode;
int init_eventfd_mode(void);

void warn_unsupported_syscall(unsigned long sysno);
void trace_mock_syscall(unsigned long sysno);
void debug_print_syscall_before(unsigned long sysno, ...);
void debug_print_syscall_after(unsigned long sysno, ...);
int get_syscall_number(const char* name, unsigned long* out_sysno);
int init_syscalls(void);

#ifndef __alloca
#define __alloca __builtin_alloca
Expand Down
7 changes: 6 additions & 1 deletion libos/include/libos_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@
#include "linux_abi/sysinfo.h"

typedef void (*libos_syscall_t)(void);

extern libos_syscall_t libos_syscall_table[];

struct libos_mock_syscall {
bool is_mocked;
long return_value;
};
extern struct libos_mock_syscall libos_mock_syscall_table[];

/* syscall implementation */
long libos_syscall_read(int fd, void* buf, size_t count);
long libos_syscall_write(int fd, const void* buf, size_t count);
Expand Down
3 changes: 3 additions & 0 deletions libos/src/arch/x86_64/libos_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,6 @@ libos_syscall_t libos_syscall_table[LIBOS_SYSCALL_BOUND] = {
[__NR_futex_waitv] = (libos_syscall_t)0, // libos_syscall_futex_waitv
[__NR_set_mempolicy_home_node] = (libos_syscall_t)0, // libos_syscall_set_mempolicy_home_node
};

/* by default, all syscalls have `is_mocked = false` and `return_value = 0` */
struct libos_mock_syscall libos_mock_syscall_table[LIBOS_SYSCALL_BOUND] = { 0 };
1 change: 1 addition & 0 deletions libos/src/libos_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ noreturn void libos_init(const char* const* argv, const char* const* envp) {
strlen(g_pal_public_state->dns_host.hostname));

RUN_INIT(init_eventfd_mode);
RUN_INIT(init_syscalls);

log_debug("LibOS initialized");

Expand Down
20 changes: 20 additions & 0 deletions libos/src/libos_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -1649,6 +1649,26 @@ void warn_unsupported_syscall(unsigned long sysno) {
log_warning("Unsupported system call %lu", sysno);
}

void trace_mock_syscall(unsigned long sysno) {
log_trace("%s(...) = %ld (mock)", syscall_parser_table[sysno].name,
libos_mock_syscall_table[sysno].return_value);
}

int get_syscall_number(const char* name, unsigned long* out_sysno) {
static_assert(LIBOS_SYSCALL_BOUND == ARRAY_SIZE(syscall_parser_table), "oops");
assert(out_sysno);

for (size_t i = 0; i < LIBOS_SYSCALL_BOUND; i++) {
if (!syscall_parser_table[i].name)
continue;
if (strcmp(name, syscall_parser_table[i].name) == 0) {
*out_sysno = i;
return 0;
}
}
return -ENOSYS;
}

static int buf_write_all(const char* str, size_t size, void* arg) {
__UNUSED(arg);

Expand Down
80 changes: 79 additions & 1 deletion libos/src/libos_syscalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
#include "libos_table.h"
#include "libos_tcb.h"
#include "libos_thread.h"
#include "libos_utils.h"
#include "linux_abi/errors.h"
#include "toml_utils.h"

typedef arch_syscall_arg_t (*six_args_syscall_t)(arch_syscall_arg_t, arch_syscall_arg_t,
arch_syscall_arg_t, arch_syscall_arg_t,
Expand All @@ -31,7 +33,19 @@ noreturn void libos_emulate_syscall(PAL_CONTEXT* context) {
unsigned long args[] = { ALL_SYSCALL_ARGS(context) };
ret = handle_libos_call(args[0], args[1], args[2]);
} else {
if (sysnr >= LIBOS_SYSCALL_BOUND || !libos_syscall_table[sysnr]) {
if (sysnr >= LIBOS_SYSCALL_BOUND) {
warn_unsupported_syscall(sysnr);
ret = -ENOSYS;
goto out;
}

if (libos_mock_syscall_table[sysnr].is_mocked) {
trace_mock_syscall(sysnr);
ret = libos_mock_syscall_table[sysnr].return_value;
goto out;
}

if (!libos_syscall_table[sysnr]) {
warn_unsupported_syscall(sysnr);
ret = -ENOSYS;
goto out;
Expand Down Expand Up @@ -84,3 +98,67 @@ noreturn void return_from_syscall(PAL_CONTEXT* context) {
#endif
_return_from_syscall(context);
}

int init_syscalls(void) {
assert(g_manifest_root);
int ret;

toml_table_t* manifest_sys = toml_table_in(g_manifest_root, "sys");
if (!manifest_sys)
return 0;

toml_array_t* toml_mock_syscalls = toml_array_in(manifest_sys, "mock_syscalls");
if (!toml_mock_syscalls)
return 0;

ssize_t toml_mock_syscalls_cnt = toml_array_nelem(toml_mock_syscalls);
if (toml_mock_syscalls_cnt < 0)
return -EPERM;
if (toml_mock_syscalls_cnt == 0)
return 0;

char* syscall_name = NULL;

for (ssize_t i = 0; i < toml_mock_syscalls_cnt; i++) {
toml_table_t* toml_mock_syscall = toml_table_at(toml_mock_syscalls, i);
if (!toml_mock_syscall) {
log_error("Invalid mock syscall in manifest at index %ld (not a TOML table)", i);
ret = -EINVAL;
goto out;
}

ret = toml_string_in(toml_mock_syscall, "name", &syscall_name);
if (ret < 0) {
log_error("Invalid mock syscall in manifest at index %ld (can't parse `name`)", i);
ret = -EINVAL;
goto out;
}

int64_t syscall_return;
ret = toml_int_in(toml_mock_syscall, "return", /*defaultval=*/0, &syscall_return);
if (ret < 0) {
log_error("Invalid mock syscall in manifest at index %ld (can't parse `return`)", i);
ret = -EINVAL;
goto out;
}

uint64_t sysno;
ret = get_syscall_number(syscall_name, &sysno);
if (ret < 0) {
log_error("Unrecognized mock syscall `%s` in manifest at index %ld", syscall_name, i);
goto out;
}

/* add syscall to the table of mocked syscalls */
libos_mock_syscall_table[sysno].is_mocked = true;
libos_mock_syscall_table[sysno].return_value = syscall_return;

free(syscall_name);
syscall_name = NULL;
}

ret = 0;
out:
free(syscall_name);
return ret;
}
1 change: 1 addition & 0 deletions libos/test/regression/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ tests = {
'mmap_file': {},
'mmap_file_backed': {},
'mmap_file_emulated': {},
'mock_syscalls': {},
'mprotect_file_fork': {},
'mprotect_prot_growsdown': {},
'multi_pthread': {},
Expand Down
60 changes: 60 additions & 0 deletions libos/test/regression/mock_syscalls.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/* Copyright (C) 2024 Intel Corporation
* Dmitrii Kuvaiskii <dmitrii.kuvaiskii@intel.com>
*/

#define _GNU_SOURCE
#include <err.h>
#include <errno.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/eventfd.h>
#include <sys/types.h>
#include <unistd.h>

int main(void) {
int ret;

errno = 0;
ret = eventfd(0, 0);
if (ret != -1 && errno != ENOSYS)
errx(1, "expected eventfd to fail with -ENOSYS but it returned ret=%d errno=%d", ret,
errno);

errno = 0;
ret = fork();
if (ret != -1 && errno != ENOSYS)
errx(1, "expected fork to fail with -ENOSYS but it returned ret=%d errno=%d", ret, errno);

errno = 0;
ret = getpid();
if (ret < 0)
errx(1, "expected getpid to succeed but it returned ret=%d errno=%d", ret, errno);

errno = 0;
ret = getppid();
if (ret < 0)
errx(1, "expected getppid to succeed but it returned ret=%d errno=%d", ret, errno);

/* sched_yield must *not* appear in strace on the host; this case is added for manual testing */
for (int i = 0; i < 100; i++) {
errno = 0;
ret = sched_yield();
if (ret < 0) {
errx(1, "expected sched_yield to succeed (no-op) but it returned ret=%d errno=%d",
ret, errno);
}
}

/* vhangup was chosen as a syscall that will most certainly not be implemented in Gramine */
errno = 0;
ret = vhangup();
if (ret != 123)
errx(1, "expected vhangup to succeed (as a no-op, with dummy return value 123) but it "
"returned ret=%d errno=%d", ret, errno);

puts("TEST OK");
return 0;
}
41 changes: 41 additions & 0 deletions libos/test/regression/mock_syscalls.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
loader.entrypoint = "file:{{ gramine.libos }}"
libos.entrypoint = "{{ entrypoint }}"

loader.log_level = "trace"

loader.env.LD_LIBRARY_PATH = "/lib"

fs.mounts = [
{ path = "/lib", uri = "file:{{ gramine.runtimedir(libc) }}" },
{ path = "/{{ entrypoint }}", uri = "file:{{ binary_dir }}/{{ entrypoint }}" },
]

sys.mock_syscalls = [
# sched_yield is mocked as no-op (`return = 0` by default), sometimes useful for performance;
# this no-op behavior should be evident from strace on the host
{ name = "sched_yield" },

# vhangup is not implemented in Gramine but here mocked as no-op with a dummy return value
{ name = "vhangup", return = 123 },

# even though glibc wrapper is called eventfd, glibc translates it into eventfd2;
# we specify both syscall variants to be on the safe side
{ name = "eventfd", return = -38 },
{ name = "eventfd2", return = -38 },

# even though glibc wrapper is called fork, glibc translates it into clone; at the same time, musl
# uses fork syscall; we specify all syscall variants to be on the safe side
{ name = "fork", return = -38 },
{ name = "vfork", return = -38 },
{ name = "clone", return = -38 },
{ name = "clone3", return = -38 },
]

sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}

sgx.trusted_files = [
"file:{{ gramine.libos }}",
"file:{{ gramine.runtimedir(libc) }}/",
"file:{{ binary_dir }}/{{ entrypoint }}",
]
11 changes: 11 additions & 0 deletions libos/test/regression/test_libos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,17 @@ def test_010_syscall_restart(self):
self.assertIn('Got: P', stdout)
self.assertIn('TEST 2 OK', stdout)

def test_020_mock_syscalls(self):
stdout, stderr = self.run_binary(['mock_syscalls'])
self.assertIn('eventfd2(...) = -38 (mock)', stderr)
if USES_MUSL:
self.assertIn('fork(...) = -38 (mock)', stderr)
else:
self.assertIn('clone(...) = -38 (mock)', stderr)
self.assertIn('sched_yield(...) = 0 (mock)', stderr)
self.assertIn('vhangup(...) = 123 (mock)', stderr)
self.assertIn('TEST OK', stdout)

class TC_40_FileSystem(RegressionTestCase):
def test_000_proc(self):
stdout, _ = self.run_binary(['proc_common'])
Expand Down
5 changes: 3 additions & 2 deletions libos/test/regression/tests.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ manifests = [
"gettimeofday",
"groups",
"helloworld",
"host_root_fs",
"hostname",
"hostname_extra_runtime_conf",
"host_root_fs",
"init_fail",
"itimer",
"keys",
Expand All @@ -75,6 +75,7 @@ manifests = [
"mmap_file",
"mmap_file_backed",
"mmap_file_emulated",
"mock_syscalls",
"mprotect_file_fork",
"mprotect_prot_growsdown",
"multi_pthread",
Expand Down Expand Up @@ -105,8 +106,8 @@ manifests = [
"sealed_file_mod",
"select",
"send_handle",
"shared_object",
"shadow_pseudo_fs",
"shared_object",
"shebang_test_script",
"shm",
"sid",
Expand Down
3 changes: 2 additions & 1 deletion libos/test/regression/tests_musl.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ manifests = [
"gettimeofday",
"groups",
"helloworld",
"host_root_fs",
"hostname",
"hostname_extra_runtime_conf",
"host_root_fs",
"init_fail",
"itimer",
"keys",
Expand All @@ -77,6 +77,7 @@ manifests = [
"mmap_file",
"mmap_file_backed",
"mmap_file_emulated",
"mock_syscalls",
"mprotect_file_fork",
"mprotect_prot_growsdown",
"multi_pthread",
Expand Down
Loading

0 comments on commit 4a5ce3e

Please sign in to comment.