-
Notifications
You must be signed in to change notification settings - Fork 277
bwrap: capabilities support #101
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e1c4700
7f4fc55
27dde43
3e075b0
39b64a7
2c03392
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,10 +71,14 @@ uid_t opt_sandbox_uid = -1; | |
| gid_t opt_sandbox_gid = -1; | ||
| int opt_sync_fd = -1; | ||
| int opt_block_fd = -1; | ||
| int opt_userns_block_fd = -1; | ||
| int opt_info_fd = -1; | ||
| int opt_seccomp_fd = -1; | ||
| char *opt_sandbox_hostname = NULL; | ||
|
|
||
| #define CAP_TO_MASK_0(x) (1L << ((x) & 31)) | ||
| #define CAP_TO_MASK_1(x) CAP_TO_MASK_0(x - 32) | ||
|
|
||
| typedef enum { | ||
| SETUP_BIND_MOUNT, | ||
| SETUP_RO_BIND_MOUNT, | ||
|
|
@@ -217,10 +221,13 @@ usage (int ecode, FILE *out) | |
| " --symlink SRC DEST Create symlink at DEST with target SRC\n" | ||
| " --seccomp FD Load and use seccomp rules from FD\n" | ||
| " --block-fd FD Block on FD until some data to read is available\n" | ||
| " --userns-block-fd FD Block on FD until the user namespace is ready\n" | ||
| " --info-fd FD Write information about the running container to FD\n" | ||
| " --new-session Create a new terminal session\n" | ||
| " --die-with-parent Kills with SIGKILL child process (COMMAND) when bwrap or bwrap's parent dies.\n" | ||
| " --as-pid-1 Do not install a reaper process with PID=1\n" | ||
| " --cap-add CAP Add cap CAP when running as privileged user\n" | ||
| " --cap-drop CAP Drop cap CAP when running as privileged user\n" | ||
| ); | ||
| exit (ecode); | ||
| } | ||
|
|
@@ -450,8 +457,13 @@ do_init (int event_fd, pid_t initial_pid, struct sock_fprog *seccomp_prog) | |
| return initial_exit_status; | ||
| } | ||
|
|
||
| #define CAP_TO_MASK_0(x) (1L << ((x) & 31)) | ||
| #define CAP_TO_MASK_1(x) CAP_TO_MASK_0(x - 32) | ||
|
|
||
| static uint32_t requested_caps[2] = {0, 0}; | ||
|
|
||
| /* low 32bit caps needed */ | ||
| #define REQUIRED_CAPS_0 (CAP_TO_MASK (CAP_SYS_ADMIN) | CAP_TO_MASK (CAP_SYS_CHROOT) | CAP_TO_MASK (CAP_NET_ADMIN) | CAP_TO_MASK (CAP_SETUID) | CAP_TO_MASK (CAP_SETGID)) | ||
| #define REQUIRED_CAPS_0 (CAP_TO_MASK_0 (CAP_SYS_ADMIN) | CAP_TO_MASK_0 (CAP_SYS_CHROOT) | CAP_TO_MASK_0 (CAP_NET_ADMIN) | CAP_TO_MASK_0 (CAP_SETUID) | CAP_TO_MASK_0 (CAP_SETGID)) | ||
| /* high 32bit caps needed */ | ||
| #define REQUIRED_CAPS_1 0 | ||
|
|
||
|
|
@@ -473,11 +485,21 @@ set_required_caps (void) | |
| } | ||
|
|
||
| static void | ||
| drop_all_caps (void) | ||
| drop_all_caps (bool keep_requested_caps) | ||
| { | ||
| struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 }; | ||
| struct __user_cap_data_struct data[2] = { { 0 } }; | ||
|
|
||
| if (keep_requested_caps) | ||
| { | ||
| data[0].effective = requested_caps[0]; | ||
| data[0].permitted = requested_caps[0]; | ||
| data[0].inheritable = requested_caps[0]; | ||
| data[1].effective = requested_caps[1]; | ||
| data[1].permitted = requested_caps[1]; | ||
| data[1].inheritable = requested_caps[1]; | ||
| } | ||
|
|
||
| if (capset (&hdr, data) < 0) | ||
| die_with_error ("capset failed"); | ||
| } | ||
|
|
@@ -494,8 +516,12 @@ has_caps (void) | |
| return data[0].permitted != 0 || data[1].permitted != 0; | ||
| } | ||
|
|
||
| /* Most of the code here is used both to add caps to the ambient capabilities | ||
| * and drop caps from the bounding set. Handle both cases here and add | ||
| * drop_cap_bounding_set/set_ambient_capabilities wrappers to facilitate its usage. | ||
| */ | ||
| static void | ||
| drop_cap_bounding_set (void) | ||
| prctl_caps (uint32_t *caps, bool do_cap_bounding, bool do_set_ambient) | ||
| { | ||
| unsigned long cap; | ||
|
|
||
|
|
@@ -506,14 +532,56 @@ drop_cap_bounding_set (void) | |
| * https://github.com/projectatomic/bubblewrap/pull/175#issuecomment-278051373 | ||
| * https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/security/commoncap.c?id=160da84dbb39443fdade7151bc63a88f8e953077 | ||
| */ | ||
| for (cap = 0; cap <= 63; cap++) | ||
| for (cap = 0; cap <= CAP_LAST_CAP; cap++) | ||
| { | ||
| bool keep = FALSE; | ||
| if (cap < 32) | ||
| { | ||
| if (CAP_TO_MASK_0 (cap) & caps[0]) | ||
| keep = TRUE; | ||
| } | ||
| else | ||
| { | ||
| if (CAP_TO_MASK_1 (cap) & caps[1]) | ||
| keep = TRUE; | ||
| } | ||
|
|
||
| if (keep && do_set_ambient) | ||
| { | ||
| int res = prctl (PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0); | ||
| if (res == -1 && !(errno == EINVAL || errno == EPERM)) | ||
| die_with_error ("Adding ambient capability %ld", cap); | ||
| } | ||
|
|
||
| if (!keep && do_cap_bounding) | ||
| { | ||
| int res = prctl (PR_CAPBSET_DROP, cap, 0, 0, 0); | ||
| if (res == -1 && !(errno == EINVAL || errno == EPERM)) | ||
| die_with_error ("Dropping capability %ld from bounds", cap); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| static void | ||
| drop_cap_bounding_set (bool drop_all) | ||
| { | ||
| if (!drop_all) | ||
| prctl_caps (requested_caps, TRUE, FALSE); | ||
| else | ||
| { | ||
| int res = prctl (PR_CAPBSET_DROP, cap, 0, 0, 0); | ||
| if (res == -1 && !(errno == EINVAL || errno == EPERM)) | ||
| die_with_error ("Dropping capability %ld from bounds", cap); | ||
| uint32_t no_caps[2] = {0, 0}; | ||
| prctl_caps (no_caps, TRUE, FALSE); | ||
| } | ||
| } | ||
|
|
||
| static void | ||
| set_ambient_capabilities (void) | ||
| { | ||
| if (is_privileged) | ||
| return; | ||
| prctl_caps (requested_caps, FALSE, TRUE); | ||
| } | ||
|
|
||
| /* This acquires the privileges that the bwrap will need it to work. | ||
| * If bwrap is not setuid, then this does nothing, and it relies on | ||
| * unprivileged user namespaces to be used. This case is | ||
|
|
@@ -562,8 +630,8 @@ acquire_privs (void) | |
| if (new_fsuid != real_uid) | ||
| die ("Unable to set fsuid (was %d)", (int)new_fsuid); | ||
|
|
||
| /* We never need capabilies after execve(), so lets drop everything from the bounding set */ | ||
| drop_cap_bounding_set (); | ||
| /* We never need capabilities after execve(), so lets drop everything from the bounding set */ | ||
| drop_cap_bounding_set (TRUE); | ||
|
|
||
| /* Keep only the required capabilities for setup */ | ||
| set_required_caps (); | ||
|
|
@@ -585,7 +653,7 @@ switch_to_user_with_privs (void) | |
| { | ||
| /* If we're in a new user namespace, we got back the bounding set, clear it again */ | ||
| if (opt_unshare_user) | ||
| drop_cap_bounding_set (); | ||
| drop_cap_bounding_set (FALSE); | ||
|
|
||
| if (!is_privileged) | ||
| return; | ||
|
|
@@ -602,16 +670,13 @@ switch_to_user_with_privs (void) | |
| } | ||
|
|
||
| static void | ||
| drop_privs (void) | ||
| drop_privs (bool keep_requested_caps) | ||
| { | ||
| if (!is_privileged) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I haven't tested it, though I suspect that this will cause https://pagure.io/releng/issue/6550 to regress;
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is also what's causing coreos/rpm-ostree#855; we're now dropping |
||
| return; | ||
|
|
||
| /* Drop root uid */ | ||
| if (setuid (opt_sandbox_uid) < 0) | ||
| if (getuid () == 0 && setuid (opt_sandbox_uid) < 0) | ||
| die_with_error ("unable to drop root uid"); | ||
|
|
||
| drop_all_caps (); | ||
| drop_all_caps (keep_requested_caps); | ||
| } | ||
|
|
||
| static char * | ||
|
|
@@ -1541,6 +1606,23 @@ parse_args_recurse (int *argcp, | |
|
|
||
| opt_block_fd = the_fd; | ||
|
|
||
| argv += 1; | ||
| argc -= 1; | ||
| } | ||
| else if (strcmp (arg, "--userns-block-fd") == 0) | ||
| { | ||
| int the_fd; | ||
| char *endptr; | ||
|
|
||
| if (argc < 2) | ||
| die ("--userns-block-fd takes an argument"); | ||
|
|
||
| the_fd = strtol (argv[1], &endptr, 10); | ||
| if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0) | ||
| die ("Invalid fd: %s", argv[1]); | ||
|
|
||
| opt_userns_block_fd = the_fd; | ||
|
|
||
| argv += 1; | ||
| argc -= 1; | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems this leaked into the wrong commit |
||
|
|
@@ -1658,6 +1740,54 @@ parse_args_recurse (int *argcp, | |
| { | ||
| opt_as_pid_1 = TRUE; | ||
| } | ||
| else if (strcmp (arg, "--cap-add") == 0) | ||
| { | ||
| cap_value_t cap; | ||
| if (argc < 2) | ||
| die ("--cap-add takes an argument"); | ||
|
|
||
| if (strcasecmp (argv[1], "ALL") == 0) | ||
| { | ||
| requested_caps[0] = requested_caps[1] = 0xFFFFFFFF; | ||
| } | ||
| else | ||
| { | ||
| if (cap_from_name (argv[1], &cap) < 0) | ||
| die ("unknown cap: %s", argv[1]); | ||
|
|
||
| if (cap < 32) | ||
| requested_caps[0] |= CAP_TO_MASK_0 (cap); | ||
| else | ||
| requested_caps[1] |= CAP_TO_MASK_1 (cap - 32); | ||
| } | ||
|
|
||
| argv += 1; | ||
| argc -= 1; | ||
| } | ||
| else if (strcmp (arg, "--cap-drop") == 0) | ||
| { | ||
| cap_value_t cap; | ||
| if (argc < 2) | ||
| die ("--cap-drop takes an argument"); | ||
|
|
||
| if (strcasecmp (argv[1], "ALL") == 0) | ||
| { | ||
| requested_caps[0] = requested_caps[1] = 0; | ||
| } | ||
| else | ||
| { | ||
| if (cap_from_name (argv[1], &cap) < 0) | ||
| die ("unknown cap: %s", argv[1]); | ||
|
|
||
| if (cap < 32) | ||
| requested_caps[0] &= ~CAP_TO_MASK_0 (cap); | ||
| else | ||
| requested_caps[1] &= ~CAP_TO_MASK_1 (cap - 32); | ||
| } | ||
|
|
||
| argv += 1; | ||
| argc -= 1; | ||
| } | ||
| else if (*arg == '-') | ||
| { | ||
| die ("Unknown option %s", arg); | ||
|
|
@@ -1764,6 +1894,15 @@ main (int argc, | |
|
|
||
| parse_args (&argc, &argv); | ||
|
|
||
| if ((requested_caps[0] || requested_caps[1]) && is_privileged) | ||
| die ("--cap-add in setuid mode can be used only by root"); | ||
|
|
||
| if (opt_userns_block_fd != -1 && !opt_unshare_user) | ||
| die ("--userns-block-fd requires --unshare-user"); | ||
|
|
||
| if (opt_userns_block_fd != -1 && opt_info_fd == -1) | ||
| die ("--userns-block-fd requires --info-fd"); | ||
|
|
||
| /* We have to do this if we weren't installed setuid (and we're not | ||
| * root), so let's just DWIM */ | ||
| if (!is_privileged && getuid () != 0) | ||
|
|
@@ -1902,7 +2041,7 @@ main (int argc, | |
| { | ||
| /* Parent, outside sandbox, privileged (initially) */ | ||
|
|
||
| if (is_privileged && opt_unshare_user) | ||
| if (is_privileged && opt_unshare_user && opt_userns_block_fd == -1) | ||
| { | ||
| /* We're running as euid 0, but the uid we want to map is | ||
| * not 0. This means we're not allowed to write this from | ||
|
|
@@ -1920,17 +2059,11 @@ main (int argc, | |
| /* Initial launched process, wait for exec:ed command to exit */ | ||
|
|
||
| /* We don't need any privileges in the launcher, drop them immediately. */ | ||
| drop_privs (); | ||
| drop_privs (FALSE); | ||
|
|
||
| /* Optionally bind our lifecycle to that of the parent */ | ||
| handle_die_with_parent (); | ||
|
|
||
| /* Let child run now that the uid maps are set up */ | ||
| val = 1; | ||
| res = write (child_wait_fd, &val, 8); | ||
| /* Ignore res, if e.g. the child died and closed child_wait_fd we don't want to error out here */ | ||
| close (child_wait_fd); | ||
|
|
||
| if (opt_info_fd != -1) | ||
| { | ||
| cleanup_free char *output = xasprintf ("{\n \"child-pid\": %i\n}\n", pid); | ||
|
|
@@ -1940,6 +2073,19 @@ main (int argc, | |
| close (opt_info_fd); | ||
| } | ||
|
|
||
| if (opt_userns_block_fd != -1) | ||
| { | ||
| char b[1]; | ||
| (void) TEMP_FAILURE_RETRY (read (opt_userns_block_fd, b, 1)); | ||
| close (opt_userns_block_fd); | ||
| } | ||
|
|
||
| /* Let child run now that the uid maps are set up */ | ||
| val = 1; | ||
| res = write (child_wait_fd, &val, 8); | ||
| /* Ignore res, if e.g. the child died and closed child_wait_fd we don't want to error out here */ | ||
| close (child_wait_fd); | ||
|
|
||
| monitor_child (event_fd, pid); | ||
| exit (0); /* Should not be reached, but better safe... */ | ||
| } | ||
|
|
@@ -1976,7 +2122,7 @@ main (int argc, | |
|
|
||
| ns_uid = opt_sandbox_uid; | ||
| ns_gid = opt_sandbox_gid; | ||
| if (!is_privileged && opt_unshare_user) | ||
| if (!is_privileged && opt_unshare_user && opt_userns_block_fd == -1) | ||
| { | ||
| /* In the unprivileged case we have to write the uid/gid maps in | ||
| * the child, because we have no caps in the parent */ | ||
|
|
@@ -2050,7 +2196,7 @@ main (int argc, | |
| if (child == 0) | ||
| { | ||
| /* Unprivileged setup process */ | ||
| drop_privs (); | ||
| drop_privs (FALSE); | ||
| close (privsep_sockets[0]); | ||
| setup_newroot (opt_unshare_pid, privsep_sockets[1]); | ||
| exit (0); | ||
|
|
@@ -2093,7 +2239,8 @@ main (int argc, | |
| die_with_error ("unmount old root"); | ||
|
|
||
| if (opt_unshare_user && | ||
| (ns_uid != opt_sandbox_uid || ns_gid != opt_sandbox_gid)) | ||
| (ns_uid != opt_sandbox_uid || ns_gid != opt_sandbox_gid) && | ||
| opt_userns_block_fd == -1) | ||
| { | ||
| /* Now that devpts is mounted and we've no need for mount | ||
| permissions we can create a new userspace and map our uid | ||
|
|
@@ -2115,8 +2262,8 @@ main (int argc, | |
| if (chdir ("/") != 0) | ||
| die_with_error ("chdir /"); | ||
|
|
||
| /* All privileged ops are done now, so drop it */ | ||
| drop_privs (); | ||
| /* All privileged ops are done now, so drop caps we don't need */ | ||
| drop_privs (!is_privileged); | ||
|
|
||
| if (opt_block_fd != -1) | ||
| { | ||
|
|
@@ -2188,6 +2335,8 @@ main (int argc, | |
|
|
||
| if (pid != 0) | ||
| { | ||
| drop_all_caps (FALSE); | ||
|
|
||
| /* Close fds in pid 1, except stdio and optionally event_fd | ||
| (for syncing pid 2 lifetime with monitor_child) and | ||
| opt_sync_fd (for syncing sandbox lifetime with outside | ||
|
|
@@ -2227,6 +2376,9 @@ main (int argc, | |
| /* Optionally bind our lifecycle */ | ||
| handle_die_with_parent (); | ||
|
|
||
| if (!is_privileged) | ||
| set_ambient_capabilities (); | ||
|
|
||
| /* Should be the last thing before execve() so that filters don't | ||
| * need to handle anything above */ | ||
| if (seccomp_data != NULL && | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NIt: this looks like a dupe.