From 99160b80e80a711996accfe9fda8686badab1d89 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 3 Aug 2016 18:14:00 +0300 Subject: [PATCH 001/277] crns.py: New attempt to have --unshare option So, here's the enhanced version of the first try. Changes are: 1. The wrapper name is criu-ns instead of crns.py 2. The CLI is absolutely the same as for criu, since the script re-execl-s criu binary. E.g. scripts/criu-ns dump -t 1234 ... just works 3. Caller doesn't need to care about substituting CLI options, instead, the scripts analyzes the command line and a) replaces -t|--tree argument with virtual pid __if__ the target task lives in another pidns b) keeps the current cwd (and root) __if__ switches to another mntns. A limitation applies here -- cwd path should be the same in target ns, no "smart path mapping" is performed. So this script is for now only useful for mntns clones (which is our main goal at the moment). Signed-off-by: Pavel Emelyanov Looks-good-to: Andrey Vagin --- scripts/criu-ns | 240 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100755 scripts/criu-ns diff --git a/scripts/criu-ns b/scripts/criu-ns new file mode 100755 index 0000000000..e7ebbf0ca2 --- /dev/null +++ b/scripts/criu-ns @@ -0,0 +1,240 @@ +#!/usr/bin/env python +import ctypes +import ctypes.util +import errno +import sys +import os + +# constants for unshare +CLONE_NEWNS = 0x00020000 +CLONE_NEWPID = 0x20000000 + +# - constants for mount +MS_REC = 16384 +MS_PRIVATE = 1 << 18 +MS_SLAVE = 1 << 19 + +# Load libc bindings +_libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True) + +try: + _unshare = _libc.unshare +except AttributeError: + raise OSError(errno.EINVAL, "unshare is not supported on this platform") +else: + _unshare.argtypes = [ ctypes.c_int ] + _unshare.restype = ctypes.c_int + +try: + _setns = _libc.setns +except AttributeError: + raise OSError(errno.EINVAL, "setns is not supported on this platform") +else: + _setns.argtypes = [ ctypes.c_int, ctypes.c_int ] + _setns.restype = ctypes.c_int + +try: + _mount = _libc.mount +except AttributeError: + raise OSError(errno.EINVAL, "mount is not supported on this platform") +else: + _mount.argtypes = [ + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_ulong, + ctypes.c_void_p + ] + _mount.restype = ctypes.c_int + +try: + _umount = _libc.umount +except AttributeError: + raise OSError(errno.EINVAL, "umount is not supported on this platform") +else: + _umount.argtypes = [ctypes.c_char] + _umount.restype = ctypes.c_int + + +def run_criu(): + print sys.argv + os.execlp('criu', *['criu'] + sys.argv[1:]) + + +def wrap_restore(): + # Unshare pid and mount namespaces + if _unshare(CLONE_NEWNS | CLONE_NEWPID) != 0: + _errno = ctypes.get_errno() + raise OSError(_errno, errno.errorcode[_errno]) + + (r_pipe, w_pipe) = os.pipe() + + # Spawn the init + if os.fork() == 0: + os.close(r_pipe) + + # Mount new /proc + if _mount(None, "/", None, MS_SLAVE|MS_REC, None) != 0: + _errno = ctypes.get_errno() + raise OSError(_errno, errno.errorcode[_errno]) + + if _mount('proc', '/proc', 'proc', 0, None) != 0: + _errno = ctypes.get_errno() + raise OSError(_errno, errno.errorcode[_errno]) + + # Spawn CRIU binary + criu_pid = os.fork() + if criu_pid == 0: + run_criu() + raise OSError(errno.ENOENT, "No such command") + + while True: + try: + (pid, status) = os.wait() + if pid == criu_pid: + status = os.WEXITSTATUS(status) + break + except OSError: + status = -251 + break + + os.write(w_pipe, "%d" % status) + os.close(w_pipe) + + if status != 0: + sys.exit(status) + + while True: + try: + os.wait() + except OSError: + break + + sys.exit(0) + + # Wait for CRIU to exit and report the status back + os.close(w_pipe) + status = os.read(r_pipe, 1024) + if not status.isdigit(): + status_i = -252 + else: + status_i = int(status) + + return status_i + + +def get_varg(args): + for i in xrange(1, len(sys.argv)): + if not sys.argv[i] in args: + continue + + if i + 1 >= len(sys.argv): + break + + return (sys.argv[i + 1], i + 1) + + return (None, None) + + + +def set_pidns(tpid, pid_idx): + # Joind pid namespace. Note, that the given pid should + # be changed in -t option, as task lives in different + # pid namespace. + + myns = os.stat('/proc/self/ns/pid').st_ino + + ns_fd = os.open('/proc/%s/ns/pid' % tpid, os.O_RDONLY) + if myns != os.fstat(ns_fd).st_ino: + + for l in open('/proc/%s/status' % tpid): + if not l.startswith('NSpid:'): + continue + + ls = l.split() + if ls[1] != tpid: + raise OSError(errno.ESRCH, 'No such pid') + + print 'Replace pid %s with %s' % (tpid, ls[2]) + sys.argv[pid_idx] = ls[2] + break + else: + raise OSError(errno.ENOENT, 'Cannot find NSpid field in proc') + + if _setns(ns_fd, 0) != 0: + _errno = ctypes.get_errno() + raise OSError(_errno, errno.errorcode[_errno]) + + os.close(ns_fd) + + +def set_mntns(tpid): + # Join mount namespace. Trick here too -- check / and . + # will be the same in target mntns. + + myns = os.stat('/proc/self/ns/mnt').st_ino + ns_fd = os.open('/proc/%s/ns/mnt' % tpid, os.O_RDONLY) + if myns != os.fstat(ns_fd).st_ino: + root_st = os.stat('/') + cwd_st = os.stat('.') + cwd_path = os.path.realpath('.') + + if _setns(ns_fd, 0) != 0: + _errno = ctypes.get_errno() + raise OSError(_errno, errno.errorcode[_errno]) + + os.chdir(cwd_path) + root_nst = os.stat('/') + cwd_nst = os.stat('.') + + def steq(st, nst): + return (st.st_dev, st.st_ino) == (nst.st_dev, nst.st_ino) + + if not steq(root_st, root_nst): + raise OSError(errno.EXDEV, 'Target ns / is not as current') + if not steq(cwd_st, cwd_nst): + raise OSError(errno.EXDEV, 'Target ns . is not as current') + + + os.close(ns_fd) + + +def wrap_dump(): + (pid, pid_idx) = get_varg(('-t', '--tree')) + if pid is None: + raise OSError(errno.EINVAL, 'No --tree option given') + + set_pidns(pid, pid_idx) + set_mntns(pid) + + # Spawn CRIU binary + criu_pid = os.fork() + if criu_pid == 0: + run_criu() + raise OSError(errno.ENOENT, "No such command") + + # Wait for CRIU to exit and report the status back + while True: + try: + (pid, status) = os.wait() + if pid == criu_pid: + status = os.WEXITSTATUS(status) + break + except OSError: + status = -251 + break + + return status + + +action = sys.argv[1] + +if action == 'restore': + res = wrap_restore() +elif action == 'dump' or action == 'pre-dump': + res = wrap_dump() +else: + print 'Unsupported action %s for nswrap' % action + res = -1 + +sys.exit(res) From 499f3bec1c16368f7c6a203e672dd75290cbbbad Mon Sep 17 00:00:00 2001 From: "rbruno@gsd.inesc-id.pt" Date: Sat, 11 Feb 2017 04:34:43 +0100 Subject: [PATCH 002/277] util: Copy file w/o sendfile This is the case when the in/out files are image cache/proxy sockets. Signed-off-by: Rodrigo Bruno Signed-off-by: Katerina Koukiou Signed-off-by: Pavel Emelyanov --- criu/util.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/criu/util.c b/criu/util.c index 028f604bb8..f8fd0332b6 100644 --- a/criu/util.c +++ b/criu/util.c @@ -421,29 +421,46 @@ int copy_file(int fd_in, int fd_out, size_t bytes) { ssize_t written = 0; size_t chunk = bytes ? bytes : 4096; + char *buffer = (char*) malloc(chunk); + ssize_t ret; while (1) { - ssize_t ret; + if (false) { + ret = read(fd_in, buffer, chunk); + if (ret < 0) { + pr_perror("Can't read from fd_in\n"); + ret = -1; + goto err; + } + if (write(fd_out, buffer, ret) != ret) { + pr_perror("Couldn't write all read bytes\n"); + ret = -1; + goto err; + } + } else + ret = sendfile(fd_out, fd_in, NULL, chunk); - ret = sendfile(fd_out, fd_in, NULL, chunk); if (ret < 0) { pr_perror("Can't send data to ghost file"); - return -1; + ret = -1; + goto err; } if (ret == 0) { if (bytes && (written != bytes)) { pr_err("Ghost file size mismatch %zu/%zu\n", written, bytes); - return -1; + ret = -1; + goto err; } break; } written += ret; } - - return 0; +err: + free(buffer); + return ret; } int read_fd_link(int lfd, char *buf, size_t size) From 3a48a097b226922becd7f9a2a9412d35057c030a Mon Sep 17 00:00:00 2001 From: "rbruno@gsd.inesc-id.pt" Date: Sat, 11 Feb 2017 04:34:43 +0100 Subject: [PATCH 003/277] Process Migration using Sockets (p1) This patch introduces the --remote option and the necessary code changes to support it. This leaves user the option to decide if the checkpoint data is to be stored on disk or sent through the network (through the image-proxy). The latter forwards the data to the destination node where image-cache receives it. The overall communication is performed as follows: src_node CRIU dump -> (sends images through UNIX sockets) -> image-proxy | V dst_node: CRIU restore <- (receives images through UNIX sockets)<- image-cache Communication between image-proxy and image-cache is done through a single TCP connection. Running criu with --remote option is like this: dst_node# criu image-cache -d --port -o /tmp/image-cache.log dst_node# criu restore --remote -o /tmp/image-cache.log src_node# criu image-proxy -d --port --address -o /tmp/image-proxy.log src_node# criu dump -t --remote -o /tmp/dump.log [ xemul: here's the list of what should be done with the cache/proxy in order to have them merged into master. 0. Document the whole thing :) Please, add articles for newly introduced actions and options to https://criu.org/CLI page. Also, it would be good to have an article describing the protocols involved. 1. Make the unix sockets reside in work-dir. The good thing is that we've get rid of the socket name option :) But looking at do_open_remote_image() I see that it fchdir-s to image dir before connecting to proxy/cache. Better solution is to put the socket into workdir. 1a. After this the option -D|--images-dir should become optional. Provided the --remote is given CRIU should work purely on the work-dir and not generate anything in the images-dir. 2. Tune up the image_cache and image_proxy commands to accept the --status-fd and --pidfile options. Presumably the very cr_daemon() call should be equipped with everything that should be done for daemonizing and proxy/cache tasks should just call it :) 3. Fix local connections not to generate per-image threads. There can be many images and it's not nice to stress the system with such amount of threads. Please, look at how criu/uffd.c manages multiple descriptors with page-faults using the epoll stuff. 3a. The accept_remote_image_connections() seem not to work well with opts.ps_socket scenario as the former just calls accept() on whatever socket is passed there, while the opts.ps_socket is already an established socket for data transfer. 4. No strings in protocol. Now the hard-coded "RESTORE_FINISH" string (and DUMP_FINISHED one) is used to terminate the communication. Need to tune up the protobuf objects to send boolean (or integer) EOF sign rather that the string. 5. Check how proxy/cache works with incremental dumps. Looking at the skip_remote_bytes() I think that image-cache and -proxy still do not work well with stacked pages images. Probably for those we'll need the page-server or lazy-pages -like protocol that would request the needed regions and receive it back rather than read bytes from sockets simply to skip those. 6. Add support for cache/proxy into go-phaul code. I haven't yet finished with the prototype, but plan to do it soon, so once the above steps are done we'll be able to proceed with this one. ] Signed-off-by: Rodrigo Bruno Signed-off-by: Katerina Koukiou Signed-off-by: Pavel Emelyanov --- criu/Makefile.crtools | 4 + criu/config.c | 1 + criu/cr-dump.c | 16 ++ criu/cr-restore.c | 6 + criu/crtools.c | 13 ++ criu/image.c | 82 ++++++++--- criu/img-remote.c | 275 +++++++++++++++++++++++++++++++++++ criu/include/cr_options.h | 1 + criu/include/img-remote.h | 83 +++++++++++ criu/include/protobuf-desc.h | 4 + criu/page-xfer.c | 46 ++++-- criu/pagemap.c | 51 +++++-- criu/protobuf-desc.c | 1 + criu/util.c | 2 +- images/Makefile | 1 + images/remote-image.proto | 20 +++ 16 files changed, 568 insertions(+), 38 deletions(-) create mode 100644 criu/img-remote.c create mode 100644 criu/include/img-remote.h create mode 100644 images/remote-image.proto diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 4588ea5b8a..1756cac6ce 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -29,6 +29,10 @@ obj-y += files-reg.o obj-y += fsnotify.o obj-y += image-desc.o obj-y += image.o +obj-y += img-remote.o +obj-y += img-proxy.o +obj-y += img-cache.o +obj-y += img-remote-proto.o obj-y += ipc_ns.o obj-y += irmap.o obj-y += kcmp-ids.o diff --git a/criu/config.c b/criu/config.c index 39aa071c99..3a54afd4b3 100644 --- a/criu/config.c +++ b/criu/config.c @@ -508,6 +508,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT(SK_CLOSE_PARAM, &opts.tcp_close), { "verbosity", optional_argument, 0, 'v' }, { "ps-socket", required_argument, 0, 1091}, + BOOL_OPT("remote", &opts.remote), { "config", required_argument, 0, 1089}, { "no-default-config", no_argument, 0, 1090}, { "tls-cacert", required_argument, 0, 1092}, diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 9273fc0a51..e070b8b254 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -80,6 +80,7 @@ #include "fault-injection.h" #include "dump.h" #include "eventpoll.h" +#include "img-remote.h" /* * Architectures can overwrite this function to restore register sets that @@ -1563,6 +1564,11 @@ int cr_pre_dump_tasks(pid_t pid) */ rlimit_unlimit_nofile(); + if (opts.remote && push_snapshot_id() < 0) { + pr_err("Failed to push image namespace.\n"); + goto err; + } + root_item = alloc_pstree_item(); if (!root_item) goto err; @@ -1739,6 +1745,11 @@ static int cr_dump_finish(int ret) close_service_fd(CR_PROC_FD_OFF); + if (opts.remote && (finish_remote_dump() < 0)) { + pr_err("Finish remote dump failed.\n"); + return post_dump_ret ? : 1; + } + if (ret) { pr_err("Dumping FAILED.\n"); } else { @@ -1767,6 +1778,11 @@ int cr_dump_tasks(pid_t pid) */ rlimit_unlimit_nofile(); + if (opts.remote && push_snapshot_id() < 0) { + pr_err("Failed to push image namespace.\n"); + goto err; + } + root_item = alloc_pstree_item(); if (!root_item) goto err; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index b4530f8e5d..de0b2cb407 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -27,6 +27,7 @@ #include "cr_options.h" #include "servicefd.h" #include "image.h" +#include "img-remote.h" #include "util.h" #include "util-pie.h" #include "criu-log.h" @@ -2423,6 +2424,11 @@ int cr_restore_tasks(void) goto err; ret = restore_root_task(root_item); + + if (opts.remote && (finish_remote_restore() < 0)) { + pr_err("Finish remote restore failed.\n"); + goto err; + } err: cr_plugin_fini(CR_PLUGIN_STAGE__RESTORE, ret); return ret; diff --git a/criu/crtools.c b/criu/crtools.c index a94875684e..b8e074f81a 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -46,6 +46,7 @@ #include "setproctitle.h" #include "sysctl.h" +#include "img-remote.h" int main(int argc, char *argv[], char *envp[]) { @@ -228,6 +229,12 @@ int main(int argc, char *argv[], char *envp[]) if (!strcmp(argv[optind], "page-server")) return cr_page_server(opts.daemon_mode, false, -1) != 0; + if (!strcmp(argv[optind], "image-cache")) + return image_cache(opts.daemon_mode, DEFAULT_CACHE_SOCKET, opts.port); + + if (!strcmp(argv[optind], "image-proxy")) + return image_proxy(opts.daemon_mode, DEFAULT_PROXY_SOCKET, opts.addr, opts.port); + if (!strcmp(argv[optind], "service")) return cr_service(opts.daemon_mode); @@ -267,6 +274,8 @@ int main(int argc, char *argv[], char *envp[]) " criu service []\n" " criu dedup\n" " criu lazy-pages -D DIR []\n" +" criu image-cache []\n" +" criu image-proxy []\n" "\n" "Commands:\n" " dump checkpoint a process/tree identified by pid\n" @@ -278,6 +287,8 @@ int main(int argc, char *argv[], char *envp[]) " dedup remove duplicates in memory dump\n" " cpuinfo dump writes cpu information into image file\n" " cpuinfo check validates cpu information read from image file\n" +" image-proxy launch dump-side proxy to sent images\n" +" image-cache launch restore-side cache to reveive images\n" ); if (usage_error) { @@ -330,6 +341,8 @@ int main(int argc, char *argv[], char *envp[]) " macvlan[IFNAME]:OUTNAME\n" " mnt[COOKIE]:ROOT\n" "\n" +" --remote dump/restore images directly to/from remote node using\n" +" image-proxy/image-cache\n" "* Special resources support:\n" " --" SK_EST_PARAM " checkpoint/restore established TCP connections\n" " --" SK_INFLIGHT_PARAM " skip (ignore) in-flight TCP connections\n" diff --git a/criu/image.c b/criu/image.c index 2eb9269296..6a10d60e8d 100644 --- a/criu/image.c +++ b/criu/image.c @@ -17,6 +17,7 @@ #include "images/inventory.pb-c.h" #include "images/pagemap.pb-c.h" #include "proc_parse.h" +#include "img-remote.h" #include "namespaces.h" bool ns_per_id = false; @@ -390,6 +391,43 @@ static int img_write_magic(struct cr_img *img, int oflags, int type) return write_img(img, &imgset_template[type].magic); } +int do_open_remote_image(int dfd, char *path, int flags) +{ + char *snapshot_id = NULL; + int ret; + + /* When using namespaces, the current dir is changed so we need to + * change to previous working dir and back to correctly open the image + * proxy and cache sockets. */ + int save = dirfd(opendir(".")); + if (fchdir(get_service_fd(IMG_FD_OFF)) < 0) { + pr_debug("fchdir to dfd failed!\n"); + return -1; + } + + snapshot_id = get_snapshot_id_from_idx(dfd); + + if (snapshot_id == NULL) + ret = -1; + else if (flags == O_RDONLY) { + pr_debug("do_open_remote_image RDONLY path=%s snapshot_id=%s\n", + path, snapshot_id); + ret = read_remote_image_connection(snapshot_id, path); + } else { + pr_debug("do_open_remote_image WDONLY path=%s snapshot_id=%s\n", + path, snapshot_id); + ret = write_remote_image_connection(snapshot_id, path, O_WRONLY); + } + + if (fchdir(save) < 0) { + pr_debug("fchdir to save failed!\n"); + return -1; + } + close(save); + + return ret; +} + struct openat_args { char path[PATH_MAX]; int flags; @@ -415,24 +453,28 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL); - /* - * For pages images dedup we need to open images read-write on - * restore, that may require proper capabilities, so we ask - * usernsd to do it for us - */ - if (root_ns_mask & CLONE_NEWUSER && - type == CR_FD_PAGES && oflags & O_RDWR) { - struct openat_args pa = { - .flags = flags, - .err = 0, - .mode = CR_FD_PERM, - }; - snprintf(pa.path, PATH_MAX, "%s", path); - ret = userns_call(userns_openat, UNS_FDOUT, &pa, sizeof(struct openat_args), dfd); - if (ret < 0) - errno = pa.err; - } else - ret = openat(dfd, path, flags, CR_FD_PERM); + if (opts.remote && !(oflags & O_FORCE_LOCAL)) + ret = do_open_remote_image(dfd, path, flags); + else { + /* + * For pages images dedup we need to open images read-write on + * restore, that may require proper capabilities, so we ask + * usernsd to do it for us + */ + if (root_ns_mask & CLONE_NEWUSER && + type == CR_FD_PAGES && oflags & O_RDWR) { + struct openat_args pa = { + .flags = flags, + .err = 0, + .mode = CR_FD_PERM, + }; + snprintf(pa.path, PATH_MAX, "%s", path); + ret = userns_call(userns_openat, UNS_FDOUT, &pa, sizeof(struct openat_args), dfd); + if (ret < 0) + errno = pa.err; + } else + ret = openat(dfd, path, flags, CR_FD_PERM); + } if (ret < 0) { if (!(flags & O_CREAT) && (errno == ENOENT || ret == -ENOENT)) { pr_info("No %s image\n", path); @@ -535,7 +577,9 @@ int open_image_dir(char *dir) return -1; fd = ret; - if (opts.img_parent) { + if (opts.remote) { + init_snapshot_id(dir); + } else if (opts.img_parent) { ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK); if (ret < 0 && errno != EEXIST) { pr_perror("Can't link parent snapshot"); diff --git a/criu/img-remote.c b/criu/img-remote.c new file mode 100644 index 0000000000..337cb4a7f7 --- /dev/null +++ b/criu/img-remote.c @@ -0,0 +1,275 @@ +#include +#include +#include +#include +#include +#include +#include +#include "xmalloc.h" +#include "criu-log.h" +#include "img-remote.h" +#include "img-remote-proto.h" +#include "images/remote-image.pb-c.h" +#include "protobuf-desc.h" +#include +#include "servicefd.h" +#include "common/compiler.h" +#include "cr_options.h" + +#define PB_LOCAL_IMAGE_SIZE PATHLEN + +static char *snapshot_id; +bool restoring = true; + +LIST_HEAD(snapshot_head); + +/* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an + * ID which corresponds to the working directory specefied by the user. + */ +struct snapshot { + char snapshot_id[PATHLEN]; + struct list_head l; +}; + +struct snapshot *new_snapshot(char *snapshot_id) +{ + struct snapshot *s = malloc(sizeof(struct snapshot)); + + if (!s) { + pr_perror("Failed to allocate snapshot structure"); + return NULL; + } + strncpy(s->snapshot_id, snapshot_id, PATHLEN); + return s; +} + +void add_snapshot(struct snapshot *snapshot) +{ + list_add_tail(&(snapshot->l), &snapshot_head); +} + +int read_remote_image_connection(char *snapshot_id, char *path) +{ + int error; + int sockfd = setup_UNIX_client_socket(restoring ? DEFAULT_CACHE_SOCKET: DEFAULT_PROXY_SOCKET); + + if (sockfd < 0) { + pr_perror("Error opening local connection for %s:%s", path, snapshot_id); + return -1; + } + + if (write_header(sockfd, snapshot_id, path, O_RDONLY) < 0) { + pr_perror("Error writing header for %s:%s", path, snapshot_id); + return -1; + } + + if (read_reply_header(sockfd, &error) < 0) { + pr_perror("Error reading reply header for %s:%s", path, snapshot_id); + return -1; + } + if (!error || !strncmp(path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) + return sockfd; + else if (error == ENOENT) { + pr_info("Image does not exist (%s:%s)\n", path, snapshot_id); + close(sockfd); + return -ENOENT; + } + pr_perror("Unexpected error returned: %d (%s:%s)\n", error, path, snapshot_id); + close(sockfd); + return -1; +} + +int write_remote_image_connection(char *snapshot_id, char *path, int flags) +{ + int sockfd = setup_UNIX_client_socket(DEFAULT_PROXY_SOCKET); + + if (sockfd < 0) + return -1; + + if (write_header(sockfd, snapshot_id, path, flags) < 0) { + pr_perror("Error writing header for %s:%s", path, snapshot_id); + return -1; + } + return sockfd; +} + +int finish_remote_dump(void) +{ + pr_info("Dump side is calling finish\n"); + int fd = write_remote_image_connection(NULL_SNAPSHOT_ID, DUMP_FINISH, O_WRONLY); + + if (fd == -1) { + pr_perror("Unable to open finish dump connection"); + return -1; + } + + close(fd); + return 0; +} + +int finish_remote_restore(void) +{ + pr_info("Restore side is calling finish\n"); + int fd = read_remote_image_connection(NULL_SNAPSHOT_ID, RESTORE_FINISH); + + if (fd == -1) { + pr_perror("Unable to open finish restore connection"); + return -1; + } + + close(fd); + return 0; +} + +int skip_remote_bytes(int fd, unsigned long len) +{ + static char buf[4096]; + int n = 0; + unsigned long curr = 0; + + for (; curr < len; ) { + n = read(fd, buf, min(len - curr, (unsigned long)4096)); + if (n == 0) { + pr_perror("Unexpected end of stream (skipping %lx/%lx bytes)", + curr, len); + return -1; + } else if (n > 0) { + curr += n; + } else { + pr_perror("Error while skipping bytes from stream (%lx/%lx)", + curr, len); + return -1; + } + } + + if (curr != len) { + pr_perror("Unable to skip the current number of bytes: %lx instead of %lx", + curr, len); + return -1; + } + return 0; +} + +static int pull_snapshot_ids(void) +{ + int n, sockfd; + SnapshotIdEntry *ls; + struct snapshot *s = NULL; + + sockfd = read_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG); + + /* The connection was successful but there is not file. */ + if (sockfd < 0 && errno == ENOENT) + return 0; + else if (sockfd < 0) { + pr_perror("Unable to open snapshot id read connection"); + return -1; + } + + while (1) { + n = pb_read_obj(sockfd, (void **)&ls, PB_SNAPSHOT_ID); + if (!n) { + close(sockfd); + return n; + } else if (n < 0) { + pr_perror("Unable to read remote snapshot ids"); + close(sockfd); + return n; + } + + s = new_snapshot(ls->snapshot_id); + if (!s) { + pr_perror("Unable create new snapshot structure"); + close(sockfd); + return -1; + } + add_snapshot(s); + pr_info("[read_snapshot ids] parent = %s\n", ls->snapshot_id); + } + free(ls); + close(sockfd); + return n; +} + +int push_snapshot_id(void) +{ + int n; + restoring = false; + SnapshotIdEntry rn = SNAPSHOT_ID_ENTRY__INIT; + int sockfd = write_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG, O_APPEND); + + if (sockfd < 0) { + pr_perror("Unable to open snapshot id push connection"); + return -1; + } + + rn.snapshot_id = xmalloc(sizeof(char) * PATHLEN); + if (!rn.snapshot_id) { + pr_perror("Unable to allocate snapshot id buffer"); + close(sockfd); + return -1; + } + strncpy(rn.snapshot_id, snapshot_id, PATHLEN); + + n = pb_write_obj(sockfd, &rn, PB_SNAPSHOT_ID); + + xfree(rn.snapshot_id); + close(sockfd); + return n; +} + +void init_snapshot_id(char *si) +{ + snapshot_id = si; +} + +char *get_curr_snapshot_id(void) +{ + return snapshot_id; +} + +int get_curr_snapshot_id_idx(void) +{ + struct snapshot *si; + int idx = 0; + + if (list_empty(&snapshot_head)) + pull_snapshot_ids(); + + list_for_each_entry(si, &snapshot_head, l) { + if (!strncmp(si->snapshot_id, snapshot_id, PATHLEN)) + return idx; + idx++; + } + + pr_perror("Error, could not find current snapshot id (%s) fd", snapshot_id); + return -1; +} + +char *get_snapshot_id_from_idx(int idx) +{ + struct snapshot *si; + + if (list_empty(&snapshot_head)) + pull_snapshot_ids(); + + /* Note: if idx is the service fd then we need the current + * snapshot_id idx. Else we need a parent snapshot_id idx. + */ + if (idx == get_service_fd(IMG_FD_OFF)) + idx = get_curr_snapshot_id_idx(); + + list_for_each_entry(si, &snapshot_head, l) { + if (!idx) + return si->snapshot_id; + idx--; + } + + pr_perror("Error, could not find snapshot id for idx %d", idx); + return NULL; +} + +int get_curr_parent_snapshot_id_idx(void) +{ + return get_curr_snapshot_id_idx() - 1; +} diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 82f76ad948..c519c740df 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -135,6 +135,7 @@ struct cr_options { int weak_sysctls; int status_fd; bool orphan_pts_master; + int remote; pid_t tree_id; int log_level; char *imgs_dir; diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h new file mode 100644 index 0000000000..38acbd26de --- /dev/null +++ b/criu/include/img-remote.h @@ -0,0 +1,83 @@ +#include +#include + +#ifndef IMAGE_REMOTE_H +#define IMAGE_REMOTE_H + +#define PATHLEN PATH_MAX +#define DUMP_FINISH "DUMP_FINISH" +#define RESTORE_FINISH "RESTORE_FINISH" +#define PARENT_IMG "parent" +#define NULL_SNAPSHOT_ID "null" +#define DEFAULT_CACHE_SOCKET "img-cache.sock" +#define DEFAULT_PROXY_SOCKET "img-proxy.sock" +#define DEFAULT_CACHE_PORT 9996 +#define DEFAULT_CACHE_HOST "localhost" + +/* Called by restore to get the fd correspondent to a particular path. This call + * will block until the connection is received. + */ +int read_remote_image_connection(char *snapshot_id, char *path); + +/* Called by dump to create a socket connection to the restore side. The socket + * fd is returned for further writing operations. + */ +int write_remote_image_connection(char *snapshot_id, char *path, int flags); + +/* Called by dump/restore when everything is dumped/restored. This function + * creates a new connection with a special control name. The receiver side uses + * it to ack that no more files are coming. + */ +int finish_remote_dump(); +int finish_remote_restore(); + +/* Starts an image proxy daemon (dump side). It receives image files through + * socket connections and forwards them to the image cache (restore side). + */ +int image_proxy(bool background, char *local_proxy_path, char *cache_host, unsigned short cache_port); + +/* Starts an image cache daemon (restore side). It receives image files through + * socket connections and caches them until they are requested by the restore + * process. + */ +int image_cache(bool background, char *local_cache_path, unsigned short cache_port); + +/* Reads (discards) 'len' bytes from fd. This is used to emulate the function + * lseek, which is used to advance the file needle. + */ +int skip_remote_bytes(int fd, unsigned long len); + +/* To support iterative migration, the concept of snapshot_id is introduced + * (only when remote migration is enabled). Each image is tagged with one + * snapshot_id. The snapshot_id is the image directory used for the operation + * that creates the image (either predump or dump). Images stored in memory + * (both in Image Proxy and Image Cache) are identified by their name and + * snapshot_id. Snapshot_ids are ordered so that we can find parent pagemaps + * (that will be used when restoring the process). + */ + +/* Sets the current snapshot_id */ +void init_snapshot_id(char *ns); + +/* Returns the current snapshot_id. */ +char *get_curr_snapshot_id(); + +/* Returns the snapshot_id index representing the current snapshot_id. This + * index represents the hierarchy position. For example: images tagged with + * the snapshot_id with index 1 are more recent than the images tagged with + * the snapshot_id with index 0. + */ +int get_curr_snapshot_id_idx(); + +/* Returns the snapshot_id associated with the snapshot_id index. */ +char *get_snapshot_id_from_idx(int idx); + +/* Pushes the current snapshot_id into the snapshot_id hierarchy (into the Image + * Proxy and Image Cache). + */ +int push_snapshot_id(); + +/* Returns the snapshot id index that preceeds the current snapshot_id. */ +int get_curr_parent_snapshot_id_idx(); + +#endif diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 31f5b9a796..696a5800b2 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -61,6 +61,10 @@ enum { PB_AUTOFS, PB_GHOST_CHUNK, PB_FILE, + PB_REMOTE_IMAGE, /* Header for images sent from proxy to cache.*/ + PB_LOCAL_IMAGE, /* Header for reading/writing images from/to proxy or cache. */ + PB_LOCAL_IMAGE_REPLY, /* Header for reading/writing images reply. */ + PB_SNAPSHOT_ID, /* Contains a single id. Used for reading/writing ids from proxy or cache. */ /* PB_AUTOGEN_STOP */ diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 75e135c662..b826861e88 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -24,6 +24,7 @@ #include "parasite-syscall.h" #include "rst_info.h" #include "stats.h" +#include "img-remote.h" #include "tls.h" static int page_server_sk = -1; @@ -381,13 +382,29 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, unsigned lo int pfd; int pr_flags = (fd_type == CR_FD_PAGEMAP) ? PR_TASK : PR_SHMEM; - pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); - if (pfd < 0 && errno == ENOENT) - goto out; + + if (opts.remote) { + /* Note: we are replacing a real directory FD for a snapshot_id + * index. Since we need the parent of the current snapshot_id, + * we want the current snapshot_id index minus one. It is + * possible that dfd is already a snapshot_id index. We test it + * by comparing it to the service FD. When opening an image (see + * do_open_image) we convert the snapshot_id index into a real + * snapshot_id. + */ + pfd = get_curr_snapshot_id_idx() - 1; + if (pfd < 0) + goto out; + } else { + pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); + if (pfd < 0 && errno == ENOENT) + goto out; + } xfer->parent = xmalloc(sizeof(*xfer->parent)); if (!xfer->parent) { - close(pfd); + if (!opts.remote) + close(pfd); return -1; } @@ -396,10 +413,12 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, unsigned lo pr_perror("No parent image found, though parent directory is set"); xfree(xfer->parent); xfer->parent = NULL; - close(pfd); + if (!opts.remote) + close(pfd); goto out; } - close(pfd); + if (!opts.remote) + close(pfd); } out: @@ -531,9 +550,16 @@ int check_parent_local_xfer(int fd_type, unsigned long img_id) struct stat st; int ret, pfd; - pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); - if (pfd < 0 && errno == ENOENT) - return 0; + if (opts.remote) { + pfd = get_curr_parent_snapshot_id_idx(); + pr_err("Unable to get parent snapshot id\n"); + if (pfd == -1) + return -1; + } else { + pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); + if (pfd < 0 && errno == ENOENT) + return 0; + } snprintf(path, sizeof(path), imgset_template[fd_type].fmt, img_id); ret = fstatat(pfd, path, &st, 0); @@ -596,6 +622,8 @@ int check_parent_page_xfer(int fd_type, unsigned long img_id) { if (opts.use_page_server) return check_parent_server_xfer(fd_type, img_id); + else if (opts.remote) + return get_curr_parent_snapshot_id_idx() == -1 ? 0 : 1; else return check_parent_local_xfer(fd_type, img_id); } diff --git a/criu/pagemap.c b/criu/pagemap.c index 05f6b82b8e..8ef5bceda3 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -18,6 +18,7 @@ #include "xmalloc.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" +#include "img-remote.h" #ifndef SEEK_DATA #define SEEK_DATA 3 @@ -143,8 +144,12 @@ static void skip_pagemap_pages(struct page_read *pr, unsigned long len) if (!len) return; - if (pagemap_present(pr->pe)) + if (pagemap_present(pr->pe)) { + if (opts.remote) + if (skip_remote_bytes(img_raw_fd(pr->pi), len)) + pr_perror("Error skipping remote bytes"); pr->pi_off += len; + } pr->cvaddr += len; } @@ -161,7 +166,7 @@ static int seek_pagemap(struct page_read *pr, unsigned long vaddr) break; if (vaddr >= start && vaddr < end) { - skip_pagemap_pages(pr, vaddr - pr->cvaddr); + skip_pagemap_pages(pr, vaddr > pr->cvaddr ? vaddr - pr->cvaddr : 0); return 1; } @@ -393,7 +398,7 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, * for us for urgent async read, just do the regular * cached read. */ - if ((flags & (PR_ASYNC|PR_ASAP)) == PR_ASYNC) + if ((flags & (PR_ASYNC|PR_ASAP)) == PR_ASYNC && !opts.remote) ret = pagemap_enqueue_iovec(pr, buf, len, &pr->async); else { ret = read_local_page(pr, vaddr, len, buf); @@ -601,9 +606,24 @@ static int try_open_parent(int dfd, unsigned long id, struct page_read *pr, int int pfd, ret; struct page_read *parent = NULL; - pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY); - if (pfd < 0 && errno == ENOENT) - goto out; + if (opts.remote) { + /* Note: we are replacing a real directory FD for a snapshot_id + * index. Since we need the parent of the current snapshot_id, + * we want the current snapshot_id index minus one. It is + * possible that dfd is already a snapshot_id index. We test it + * by comparing it to the service FD. When opening an image (see + * do_open_image) we convert the snapshot_id index into a real + * snapshot_id. + */ + pfd = dfd == get_service_fd(IMG_FD_OFF) ? + get_curr_snapshot_id_idx() - 1 : dfd - 1; + if (pfd < 0) + goto out; + } else { + pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY); + if (pfd < 0 && errno == ENOENT) + goto out; + } parent = xmalloc(sizeof(*parent)); if (!parent) @@ -618,7 +638,8 @@ static int try_open_parent(int dfd, unsigned long id, struct page_read *pr, int parent = NULL; } - close(pfd); + if (!opts.remote) + close(pfd); out: pr->parent = parent; return 0; @@ -626,7 +647,8 @@ static int try_open_parent(int dfd, unsigned long id, struct page_read *pr, int err_free: xfree(parent); err_cl: - close(pfd); + if (!opts.remote) + close(pfd); return -1; } @@ -657,7 +679,18 @@ static int init_pagemaps(struct page_read *pr) off_t fsize; int nr_pmes, nr_realloc; - fsize = img_raw_size(pr->pmi); + if (!opts.remote) + fsize = img_raw_size(pr->pmi); + else + /* + * FIXME - There is no easy way to estimate the size of the + * pagemap that is still to be read from the socket. Possible + * solution is to ask Image Proxy or Image Cache about the size + * of the image. 1024 is a wild guess (more space is allocated + * if needed). + */ + fsize = 1024; + if (fsize < 0) return -1; diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index 41c2080372..bfe00c561a 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -62,6 +62,7 @@ #include "images/seccomp.pb-c.h" #include "images/binfmt-misc.pb-c.h" #include "images/autofs.pb-c.h" +#include "images/remote-image.pb-c.h" struct cr_pb_message_desc cr_pb_descs[PB_MAX]; diff --git a/criu/util.c b/criu/util.c index f8fd0332b6..1933b29ac5 100644 --- a/criu/util.c +++ b/criu/util.c @@ -425,7 +425,7 @@ int copy_file(int fd_in, int fd_out, size_t bytes) ssize_t ret; while (1) { - if (false) { + if (opts.remote) { ret = read(fd_in, buffer, chunk); if (ret < 0) { pr_perror("Can't read from fd_in\n"); diff --git a/images/Makefile b/images/Makefile index edaab06338..4de6990b46 100644 --- a/images/Makefile +++ b/images/Makefile @@ -63,6 +63,7 @@ proto-obj-y += sysctl.o proto-obj-y += autofs.o proto-obj-y += macvlan.o proto-obj-y += sit.o +proto-obj-y += remote-image.o CFLAGS += -iquote $(obj)/ diff --git a/images/remote-image.proto b/images/remote-image.proto new file mode 100644 index 0000000000..1212627e39 --- /dev/null +++ b/images/remote-image.proto @@ -0,0 +1,20 @@ +message local_image_entry { + required string name = 1; + required string snapshot_id = 2; + required uint32 open_mode = 3; +} + +message remote_image_entry { + required string name = 1; + required string snapshot_id = 2; + required uint32 open_mode = 3; + required uint64 size = 4; +} + +message local_image_reply_entry { + required uint32 error = 1; +} + +message snapshot_id_entry { + required string snapshot_id = 1; +} From 87fa36076f3da09fb30f1609545afb6371b60229 Mon Sep 17 00:00:00 2001 From: "rbruno@gsd.inesc-id.pt" Date: Sat, 11 Feb 2017 04:34:44 +0100 Subject: [PATCH 004/277] Process Migration using Sockets (p2) The current patch brings the implementation of the image proxy and image cache. These components are necessary to perform in-memory live migration of processes using CRIU. The image proxy receives images from CRIU Dump/Pre-Dump (through UNIX sockets) and forwards them to the image cache (through a TCP socket). The image cache caches image in memory and sends them to CRIU Restore (through UNIX sockets) when requested. Signed-off-by: Rodrigo Bruno Signed-off-by: Pavel Emelyanov --- criu/img-cache.c | 154 +++++++ criu/img-proxy.c | 90 ++++ criu/img-remote-proto.c | 742 ++++++++++++++++++++++++++++++++ criu/include/criu-log.h | 2 + criu/include/img-remote-proto.h | 88 ++++ criu/shmem.c | 2 +- 6 files changed, 1077 insertions(+), 1 deletion(-) create mode 100644 criu/img-cache.c create mode 100644 criu/img-proxy.c create mode 100644 criu/img-remote-proto.c create mode 100644 criu/include/img-remote-proto.h diff --git a/criu/img-cache.c b/criu/img-cache.c new file mode 100644 index 0000000000..293597088d --- /dev/null +++ b/criu/img-cache.c @@ -0,0 +1,154 @@ +#include + +#include "img-remote-proto.h" +#include "criu-log.h" +#include +#include +#include +#include +#include "cr_options.h" + +static struct rimage *wait_for_image(struct wthread *wt) +{ + struct rimage *result; + + if (!strncmp(wt->path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) { + finished = true; + shutdown(local_req_fd, SHUT_RD); + return NULL; + } + + result = get_rimg_by_name(wt->snapshot_id, wt->path); + if (result != NULL && result->size > 0) + return result; + + /* The file does not exist and we do not expect new files */ + if (finished && !is_receiving()) + return NULL; + + /* NOTE: at this point, when the thread wakes up, either the image is + * already in memory or it will never come (the dump is finished). + */ + sem_wait(&(wt->wakeup_sem)); + result = get_rimg_by_name(wt->snapshot_id, wt->path); + if (result != NULL && result->size > 0) + return result; + else + return NULL; +} + +/* The image cache creates a thread that calls this function. It waits for remote + * images from the image-cache. + */ +void *accept_remote_image_connections(void *port) +{ + int fd = *((int *) port); + struct sockaddr_in cli_addr; + socklen_t clilen = sizeof(cli_addr); + char snapshot_id_buf[PATHLEN], path_buf[PATHLEN]; + uint64_t size; + int64_t ret; + int flags, proxy_fd; + struct rimage *rimg; + + proxy_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); + if (proxy_fd < 0) { + pr_perror("Unable to accept remote image connection from image proxy"); + return NULL; + } + while (1) { + ret = read_remote_header(proxy_fd, snapshot_id_buf, path_buf, &flags, &size); + if (ret < 0) { + pr_perror("Unable to receive remote header from image proxy"); + return NULL; + } + /* This means that the no more images are coming. */ + else if (!ret) { + pr_info("Image Proxy connection closed.\n"); + finished = true; + unlock_workers(); + return NULL; + } + + pr_info("Received %s request for %s:%s\n", + flags == O_RDONLY ? "read" : + flags == O_APPEND ? "append" : "write", + path_buf, snapshot_id_buf); + + rimg = prepare_remote_image(path_buf, snapshot_id_buf, flags); + + prepare_recv_rimg(); + if (!size) + ret = 0; + else + ret = recv_image(proxy_fd, rimg, size, flags, false); + if (ret < 0) { + pr_perror("Unable to receive %s:%s from image proxy", + rimg->path, rimg->snapshot_id); + finalize_recv_rimg(NULL); + return NULL; + } else if (ret != size) { + pr_perror("Unable to receive %s:%s from image proxy (received %ld bytes, expected %lu bytes)", + rimg->path, rimg->snapshot_id, (long)ret, (unsigned long)size); + finalize_recv_rimg(NULL); + return NULL; + } + finalize_recv_rimg(rimg); + + pr_info("Finished receiving %s:%s (received %ld bytes)\n", + rimg->path, rimg->snapshot_id, (long)ret); + } +} + +int image_cache(bool background, char *local_cache_path, unsigned short cache_write_port) +{ + pthread_t local_req_thr, remote_req_thr; + + pr_info("Proxy to Cache Port %d, CRIU to Cache Path %s\n", + cache_write_port, local_cache_path); + + + if (opts.ps_socket != -1) { + proxy_to_cache_fd = opts.ps_socket; + pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); + } else { + proxy_to_cache_fd = setup_TCP_server_socket(cache_write_port); + if (proxy_to_cache_fd < 0) { + pr_perror("Unable to open proxy to cache TCP socket"); + return -1; + } + } + + local_req_fd = setup_UNIX_server_socket(local_cache_path); + if (local_req_fd < 0) { + pr_perror("Unable to open cache to proxy UNIX socket"); + return -1; + } + + if (init_daemon(background, wait_for_image)) { + pr_perror("Unable to initialize daemon"); + return -1; + } + + if (pthread_create( + &remote_req_thr, + NULL, accept_remote_image_connections, + (void *) &proxy_to_cache_fd)) { + pr_perror("Unable to create remote requests thread"); + return -1; + } + if (pthread_create( + &local_req_thr, + NULL, + accept_local_image_connections, + (void *) &local_req_fd)) { + pr_perror("Unable to create local requests thread"); + return -1; + } + + pthread_join(remote_req_thr, NULL); + pthread_join(local_req_thr, NULL); + join_workers(); + pr_info("Finished image cache."); + return 0; +} diff --git a/criu/img-proxy.c b/criu/img-proxy.c new file mode 100644 index 0000000000..58123dccb7 --- /dev/null +++ b/criu/img-proxy.c @@ -0,0 +1,90 @@ +#include + +#include "img-remote.h" +#include "img-remote-proto.h" +#include "criu-log.h" +#include +#include +#include +#include "cr_options.h" + +static struct rimage *wait_for_image(struct wthread *wt) +{ + return get_rimg_by_name(wt->snapshot_id, wt->path); +} + +int64_t forward_image(struct rimage *rimg) +{ + int64_t ret; + int fd = proxy_to_cache_fd; + + pthread_mutex_lock(&(rimg->in_use)); + pr_info("Forwarding %s:%s (%lu bytes)\n", + rimg->path, rimg->snapshot_id, (unsigned long)rimg->size); + if (write_remote_header( + fd, rimg->snapshot_id, rimg->path, O_APPEND, rimg->size) < 0) { + pr_perror("Error writing header for %s:%s", + rimg->path, rimg->snapshot_id); + pthread_mutex_unlock(&(rimg->in_use)); + return -1; + } + + ret = send_image(fd, rimg, O_APPEND, false); + if (ret < 0) { + pr_perror("Unable to send %s:%s to image cache", + rimg->path, rimg->snapshot_id); + pthread_mutex_unlock(&(rimg->in_use)); + return -1; + } else if (ret != rimg->size) { + pr_perror("Unable to send %s:%s to image proxy (sent %ld bytes, expected %lu bytes", + rimg->path, rimg->snapshot_id, (long)ret, (unsigned long)rimg->size); + pthread_mutex_unlock(&(rimg->in_use)); + return -1; + } + pr_info("Finished forwarding %s:%s (sent %lu bytes)\n", + rimg->path, rimg->snapshot_id, (unsigned long)rimg->size); + pthread_mutex_unlock(&(rimg->in_use)); + return ret; +} + +int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigned short fwd_port) +{ + pthread_t local_req_thr; + + pr_info("CRIU to Proxy Path: %s, Cache Address %s:%hu\n", + local_proxy_path, fwd_host, fwd_port); + + local_req_fd = setup_UNIX_server_socket(local_proxy_path); + if (local_req_fd < 0) { + pr_perror("Unable to open CRIU to proxy UNIX socket"); + return -1; + } + + if (opts.ps_socket != -1) { + proxy_to_cache_fd = opts.ps_socket; + pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); + } else { + proxy_to_cache_fd = setup_TCP_client_socket(fwd_host, fwd_port); + if (proxy_to_cache_fd < 0) { + pr_perror("Unable to open proxy to cache TCP socket"); + return -1; + } + } + + if (init_daemon(background, wait_for_image)) + return -1; + + if (pthread_create( + &local_req_thr, + NULL, + accept_local_image_connections, + (void *) &local_req_fd)) { + pr_perror("Unable to create local requests thread"); + return -1; + } + + pthread_join(local_req_thr, NULL); + join_workers(); + pr_info("Finished image proxy."); + return 0; +} diff --git a/criu/img-remote-proto.c b/criu/img-remote-proto.c new file mode 100644 index 0000000000..ad39c42e75 --- /dev/null +++ b/criu/img-remote-proto.c @@ -0,0 +1,742 @@ +#include +#include + +#include +#include +#include +#include +#include "sys/un.h" +#include +#include +#include + +#include "img-remote-proto.h" +#include "criu-log.h" +#include "common/compiler.h" + +#include "protobuf.h" +#include "images/remote-image.pb-c.h" +#include "image.h" + +LIST_HEAD(rimg_head); +pthread_mutex_t rimg_lock; + +pthread_mutex_t proxy_to_cache_lock; + +LIST_HEAD(workers_head); +pthread_mutex_t workers_lock; +sem_t workers_semph; + +struct rimage * (*wait_for_image) (struct wthread *wt); + +bool finished = false; +int writing = 0; +int forwarding = 0; +int proxy_to_cache_fd; +int local_req_fd; + +struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path) +{ + struct rimage *rimg = NULL; + + pthread_mutex_lock(&rimg_lock); + list_for_each_entry(rimg, &rimg_head, l) { + if (!strncmp(rimg->path, path, PATHLEN) && + !strncmp(rimg->snapshot_id, snapshot_id, PATHLEN)) { + pthread_mutex_unlock(&rimg_lock); + return rimg; + } + } + pthread_mutex_unlock(&rimg_lock); + return NULL; +} + +static struct wthread *get_wt_by_name(const char *snapshot_id, const char *path) +{ + struct wthread *wt = NULL; + + pthread_mutex_lock(&workers_lock); + list_for_each_entry(wt, &workers_head, l) { + if (!strncmp(wt->path, path, PATHLEN) && + !strncmp(wt->snapshot_id, snapshot_id, PATHLEN)) { + pthread_mutex_unlock(&workers_lock); + return wt; + } + } + pthread_mutex_unlock(&workers_lock); + return NULL; +} + +static int init_sync_structures(void) +{ + if (pthread_mutex_init(&rimg_lock, NULL) != 0) { + pr_perror("Remote image list mutex init failed"); + return -1; + } + + if (pthread_mutex_init(&proxy_to_cache_lock, NULL) != 0) { + pr_perror("Remote image connection mutex init failed"); + return -1; + } + + if (pthread_mutex_init(&workers_lock, NULL) != 0) { + pr_perror("Workers mutex init failed"); + return -1; + } + + if (sem_init(&workers_semph, 0, 0) != 0) { + pr_perror("Workers semaphore init failed"); + return -1; + } + + return 0; +} + +void prepare_recv_rimg(void) +{ + pthread_mutex_lock(&rimg_lock); + writing++; + pthread_mutex_unlock(&rimg_lock); +} + +void finalize_recv_rimg(struct rimage *rimg) +{ + + pthread_mutex_lock(&rimg_lock); + + if (rimg) + list_add_tail(&(rimg->l), &rimg_head); + writing--; + pthread_mutex_unlock(&rimg_lock); + /* Wake thread waiting for this image. */ + if (rimg) { + struct wthread *wt = get_wt_by_name(rimg->snapshot_id, rimg->path); + if (wt) + sem_post(&(wt->wakeup_sem)); + } +} + +bool is_receiving(void) +{ + int ret; + + pthread_mutex_lock(&rimg_lock); + ret = writing; + pthread_mutex_unlock(&rimg_lock); + return ret > 0; +} + +static void prepare_fwd_rimg(void) +{ + pthread_mutex_lock(&rimg_lock); + forwarding++; + pthread_mutex_unlock(&rimg_lock); +} + +static void finalize_fwd_rimg(void) +{ + pthread_mutex_lock(&rimg_lock); + forwarding--; + pthread_mutex_unlock(&rimg_lock); +} + +static bool is_forwarding(void) +{ + int ret; + + pthread_mutex_lock(&rimg_lock); + ret = forwarding; + pthread_mutex_unlock(&rimg_lock); + return ret > 0; +} + +/* This function is called when no more images are coming. Threads still waiting + * for images will be awaken to send a ENOENT (no such file) to the requester. + */ +void unlock_workers(void) +{ + struct wthread *wt = NULL; + + pthread_mutex_lock(&workers_lock); + list_for_each_entry(wt, &workers_head, l) + sem_post(&(wt->wakeup_sem)); + pthread_mutex_unlock(&workers_lock); +} + +int init_daemon(bool background, struct rimage *(*wfi)(struct wthread*)) +{ + if (background) { + if (daemon(1, 0) == -1) { + pr_perror("Can't run service server in the background"); + return -1; + } + } + wait_for_image = wfi; + return init_sync_structures(); +} + +int setup_TCP_server_socket(int port) +{ + struct sockaddr_in serv_addr; + int sockopt = 1; + int sockfd = socket(AF_INET, SOCK_STREAM, 0); + + if (sockfd < 0) { + pr_perror("Unable to open image socket"); + return -1; + } + + bzero((char *) &serv_addr, sizeof(serv_addr)); + serv_addr.sin_family = AF_INET; + serv_addr.sin_addr.s_addr = INADDR_ANY; + serv_addr.sin_port = htons(port); + + if (setsockopt( + sockfd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) == -1) { + pr_perror("Unable to set SO_REUSEADDR"); + return -1; + } + + if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { + pr_perror("Unable to bind image socket"); + return -1; + } + + if (listen(sockfd, DEFAULT_LISTEN)) { + pr_perror("Unable to listen image socket"); + return -1; + } + + return sockfd; +} + +int setup_TCP_client_socket(char *hostname, int port) +{ + int sockfd; + struct sockaddr_in serv_addr; + struct hostent *server; + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) { + pr_perror("Unable to open remote image socket"); + return -1; + } + + server = gethostbyname(hostname); + if (server == NULL) { + pr_perror("Unable to get host by name (%s)", hostname); + return -1; + } + + bzero((char *) &serv_addr, sizeof(serv_addr)); + serv_addr.sin_family = AF_INET; + bcopy((char *) server->h_addr, + (char *) &serv_addr.sin_addr.s_addr, + server->h_length); + serv_addr.sin_port = htons(port); + + if (connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { + pr_perror("Unable to connect to remote %s", hostname); + return -1; + } + + return sockfd; +} + +int setup_UNIX_server_socket(char *path) +{ + struct sockaddr_un addr; + int sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + + if (sockfd < 0) { + pr_perror("Unable to open image socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, path, sizeof(addr.sun_path)-1); + + unlink(path); + + if (bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + pr_perror("Unable to bind image socket"); + return -1; + } + + if (listen(sockfd, 50) == -1) { + pr_perror("Unable to listen image socket"); + return -1; + } + + return sockfd; +} + +int setup_UNIX_client_socket(char *path) +{ + struct sockaddr_un addr; + int sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + + if (sockfd < 0) { + pr_perror("Unable to open local image socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, path, sizeof(addr.sun_path)-1); + + if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + pr_perror("Unable to connect to local socket: %s", path); + close(sockfd); + return -1; + } + + return sockfd; +} + +int64_t pb_write_obj(int fd, void *obj, int type) +{ + struct cr_img img; + + img._x.fd = fd; + bfd_setraw(&img._x); + return pb_write_one(&img, obj, type); +} + +int64_t pb_read_obj(int fd, void **pobj, int type) +{ + struct cr_img img; + + img._x.fd = fd; + bfd_setraw(&img._x); + return do_pb_read_one(&img, pobj, type, true); +} + +int64_t write_header(int fd, char *snapshot_id, char *path, int flags) +{ + LocalImageEntry li = LOCAL_IMAGE_ENTRY__INIT; + + li.name = path; + li.snapshot_id = snapshot_id; + li.open_mode = flags; + return pb_write_obj(fd, &li, PB_LOCAL_IMAGE); +} + +int64_t write_reply_header(int fd, int error) +{ + LocalImageReplyEntry lir = LOCAL_IMAGE_REPLY_ENTRY__INIT; + + lir.error = error; + return pb_write_obj(fd, &lir, PB_LOCAL_IMAGE_REPLY); +} + +int64_t write_remote_header(int fd, char *snapshot_id, char *path, int flags, uint64_t size) +{ + RemoteImageEntry ri = REMOTE_IMAGE_ENTRY__INIT; + + ri.name = path; + ri.snapshot_id = snapshot_id; + ri.open_mode = flags; + ri.size = size; + return pb_write_obj(fd, &ri, PB_REMOTE_IMAGE); +} + +int64_t read_header(int fd, char *snapshot_id, char *path, int *flags) +{ + LocalImageEntry *li; + int ret = pb_read_obj(fd, (void **)&li, PB_LOCAL_IMAGE); + + if (ret > 0) { + strncpy(snapshot_id, li->snapshot_id, PATHLEN); + strncpy(path, li->name, PATHLEN); + *flags = li->open_mode; + } + free(li); + return ret; +} + +int64_t read_reply_header(int fd, int *error) +{ + LocalImageReplyEntry *lir; + int ret = pb_read_obj(fd, (void **)&lir, PB_LOCAL_IMAGE_REPLY); + + if (ret > 0) + *error = lir->error; + free(lir); + return ret; +} + +int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *flags, uint64_t *size) +{ + RemoteImageEntry *ri; + int ret = pb_read_obj(fd, (void **)&ri, PB_REMOTE_IMAGE); + + if (ret > 0) { + strncpy(snapshot_id, ri->snapshot_id, PATHLEN); + strncpy(path, ri->name, PATHLEN); + *flags = ri->open_mode; + *size = ri->size; + } + free(ri); + return ret; +} + +static struct wthread *new_worker(void) +{ + struct wthread *wt = malloc(sizeof(struct wthread)); + + if (!wt) { + pr_perror("Unable to allocate worker thread structure"); + return NULL; + } + if (sem_init(&(wt->wakeup_sem), 0, 0) != 0) { + pr_perror("Workers semaphore init failed"); + return NULL; + } + return wt; +} + +static void add_worker(struct wthread *wt) +{ + pthread_mutex_lock(&workers_lock); + list_add_tail(&(wt->l), &workers_head); + pthread_mutex_unlock(&workers_lock); + sem_post(&workers_semph); +} + +void join_workers(void) +{ + struct wthread *wthread = NULL; + + while (! list_empty(&workers_head)) { + wthread = list_entry(workers_head.next, struct wthread, l); + pthread_join(wthread->tid, NULL); + list_del(&(wthread->l)); + free(wthread); + } +} + +static struct rimage *new_remote_image(char *path, char *snapshot_id) +{ + struct rimage *rimg = malloc(sizeof(struct rimage)); + struct rbuf *buf = malloc(sizeof(struct rbuf)); + + if (rimg == NULL) { + pr_perror("Unable to allocate remote_image structures"); + return NULL; + } + + if (buf == NULL) { + pr_perror("Unable to allocate remote_buffer structures"); + return NULL; + } + + strncpy(rimg->path, path, PATHLEN); + strncpy(rimg->snapshot_id, snapshot_id, PATHLEN); + rimg->size = 0; + buf->nbytes = 0; + INIT_LIST_HEAD(&(rimg->buf_head)); + list_add_tail(&(buf->l), &(rimg->buf_head)); + rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + rimg->curr_sent_bytes = 0; + + if (pthread_mutex_init(&(rimg->in_use), NULL) != 0) { + pr_perror("Remote image in_use mutex init failed"); + return NULL; + } + return rimg; +} + +/* Clears a remote image struct for reusing it. */ +static struct rimage *clear_remote_image(struct rimage *rimg) +{ + pthread_mutex_lock(&(rimg->in_use)); + + while (!list_is_singular(&(rimg->buf_head))) { + struct rbuf *buf = list_entry(rimg->buf_head.prev, struct rbuf, l); + + list_del(rimg->buf_head.prev); + free(buf); + } + + list_entry(rimg->buf_head.next, struct rbuf, l)->nbytes = 0; + rimg->size = 0; + rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + rimg->curr_sent_bytes = 0; + + pthread_mutex_unlock(&(rimg->in_use)); + + return rimg; +} + +struct rimage *prepare_remote_image(char *path, char *snapshot_id, int open_mode) +{ + struct rimage *rimg = get_rimg_by_name(snapshot_id, path); + /* There is no record of such image, create a new one. */ + + if (rimg == NULL) + return new_remote_image(path, snapshot_id); + + pthread_mutex_lock(&rimg_lock); + list_del(&(rimg->l)); + pthread_mutex_unlock(&rimg_lock); + + /* There is already an image record. Simply return it for appending. */ + if (open_mode == O_APPEND) + return rimg; + /* There is already an image record. Clear it for writing. */ + else + return clear_remote_image(rimg); +} + +void *process_local_read(struct wthread *wt) +{ + struct rimage *rimg = NULL; + int64_t ret; + /* TODO - split wait_for_image + * in cache - improve the parent stuf + * in proxy - do not wait for anything, return no file + */ + rimg = wait_for_image(wt); + if (!rimg) { + pr_info("No image %s:%s.\n", wt->path, wt->snapshot_id); + if (write_reply_header(wt->fd, ENOENT) < 0) + pr_perror("Error writing reply header for unexisting image"); + close(wt->fd); + return NULL; + } else { + if (write_reply_header(wt->fd, 0) < 0) { + pr_perror("Error writing reply header for %s:%s", + wt->path, wt->snapshot_id); + close(wt->fd); + return NULL; + } + } + + pthread_mutex_lock(&(rimg->in_use)); + ret = send_image(wt->fd, rimg, wt->flags, true); + if (ret < 0) + pr_perror("Unable to send %s:%s to CRIU (sent %ld bytes)", + rimg->path, rimg->snapshot_id, (long)ret); + else + pr_info("Finished sending %s:%s to CRIU (sent %ld bytes)\n", + rimg->path, rimg->snapshot_id, (long)ret); + pthread_mutex_unlock(&(rimg->in_use)); + return NULL; +} + +static void *process_local_image_connection(void *ptr) +{ + struct wthread *wt = (struct wthread *) ptr; + struct rimage *rimg = NULL; + int64_t ret; + + /* NOTE: the code inside this if is shared for both cache and proxy. */ + if (wt->flags == O_RDONLY) + return process_local_read(wt); + + /* NOTE: IMAGE PROXY ONLY. The image cache receives write connections + * through TCP (see accept_remote_image_connections). + */ + rimg = prepare_remote_image(wt->path, wt->snapshot_id, wt->flags); + ret = recv_image(wt->fd, rimg, 0, wt->flags, true); + if (ret < 0) { + pr_perror("Unable to receive %s:%s to CRIU (received %ld bytes)", + rimg->path, rimg->snapshot_id, (long)ret); + finalize_recv_rimg(NULL); + return NULL; + } + finalize_recv_rimg(rimg); + pr_info("Finished receiving %s:%s (received %ld bytes)\n", + rimg->path, rimg->snapshot_id, (long)ret); + + + if (!strncmp(rimg->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { + finished = true; + shutdown(local_req_fd, SHUT_RD); + } else { + pthread_mutex_lock(&proxy_to_cache_lock); + ret = forward_image(rimg); + pthread_mutex_unlock(&proxy_to_cache_lock); + } + + finalize_fwd_rimg(); + if (ret < 0) { + pr_perror("Unable to forward %s:%s to Image Cache", + rimg->path, rimg->snapshot_id); + + return NULL; + } + + if (finished && !is_forwarding() && !is_receiving()) { + pr_info("Closing connection to Image Cache.\n"); + close(proxy_to_cache_fd); + unlock_workers(); + } + return NULL; +} + + +void *accept_local_image_connections(void *port) +{ + int fd = *((int *) port); + int cli_fd; + struct sockaddr_in cli_addr; + + socklen_t clilen = sizeof(cli_addr); + pthread_t tid; + struct wthread *wt; + + while (1) { + cli_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); + if (cli_fd < 0) { + if (!finished) + pr_err("Unable to accept local image connection"); + close(cli_fd); + return NULL; + } + + wt = new_worker(); + wt->fd = cli_fd; + + if (read_header(wt->fd, wt->snapshot_id, wt->path, &(wt->flags)) < 0) { + pr_perror("Error reading local image header"); + return NULL; + } + + pr_info("Received %s request for %s:%s\n", + wt->flags == O_RDONLY ? "read" : + wt->flags == O_APPEND ? "append" : "write", + wt->path, wt->snapshot_id); + + /* These function calls are used to avoid other threads from + * thinking that there are no more images are coming. + */ + if (wt->flags != O_RDONLY) { + prepare_recv_rimg(); + prepare_fwd_rimg(); + } + + /* We need to flock the last pid file to avoid stealing pids + * from restore. + */ + int fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); + if (fd < 0) { + pr_perror("Can't open %s", LAST_PID_PATH); + } + + if (flock(fd, LOCK_EX)) { + close(fd); + pr_perror("Can't lock %s", LAST_PID_PATH); + return NULL; + } + + if (pthread_create( + &tid, NULL, process_local_image_connection, (void *) wt)) { + pr_perror("Unable to create worker thread"); + return NULL; + } + + if (flock(fd, LOCK_UN)) + pr_perror("Can't unlock %s", LAST_PID_PATH); + close(fd); + + wt->tid = tid; + add_worker(wt); + } +} + +/* Note: size is a limit on how much we want to read from the socket. Zero means + * read until the socket is closed. + */ +int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool close_fd) +{ + struct rbuf *curr_buf = NULL; + int n; + + if (flags == O_APPEND) + curr_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); + else + curr_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + + while (1) { + n = read(fd, + curr_buf->buffer + curr_buf->nbytes, + size ? + min((int) (size - rimg->size), BUF_SIZE - curr_buf->nbytes) : + BUF_SIZE - curr_buf->nbytes); + if (n == 0) { + if (close_fd) + close(fd); + return rimg->size; + } else if (n > 0) { + curr_buf->nbytes += n; + rimg->size += n; + if (curr_buf->nbytes == BUF_SIZE) { + struct rbuf *buf = malloc(sizeof(struct rbuf)); + if (buf == NULL) { + pr_perror("Unable to allocate remote_buffer structures"); + if (close_fd) + close(fd); + return -1; + } + buf->nbytes = 0; + list_add_tail(&(buf->l), &(rimg->buf_head)); + curr_buf = buf; + } + if (size && rimg->size == size) { + if (close_fd) + close(fd); + return rimg->size; + } + } else { + pr_perror("Read on %s:%s socket failed", + rimg->path, rimg->snapshot_id); + if (close_fd) + close(fd); + return -1; + } + } +} + +int64_t send_image(int fd, struct rimage *rimg, int flags, bool close_fd) +{ + + int n, nblocks = 0; + + if (flags != O_APPEND) { + rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + rimg->curr_sent_bytes = 0; + } + + while (1) { + n = send( + fd, + rimg->curr_sent_buf->buffer + rimg->curr_sent_bytes, + min(BUF_SIZE, rimg->curr_sent_buf->nbytes) - rimg->curr_sent_bytes, + MSG_NOSIGNAL); + if (n > -1) { + rimg->curr_sent_bytes += n; + if (rimg->curr_sent_bytes == BUF_SIZE) { + rimg->curr_sent_buf = + list_entry(rimg->curr_sent_buf->l.next, struct rbuf, l); + nblocks++; + rimg->curr_sent_bytes = 0; + } else if (rimg->curr_sent_bytes == rimg->curr_sent_buf->nbytes) { + if (close_fd) + close(fd); + return nblocks*BUF_SIZE + rimg->curr_sent_buf->nbytes; + } + } else if (errno == EPIPE || errno == ECONNRESET) { + pr_warn("Connection for %s:%s was closed early than expected\n", + rimg->path, rimg->snapshot_id); + return 0; + } else { + pr_perror("Write on %s:%s socket failed", + rimg->path, rimg->snapshot_id); + return -1; + } + } + +} diff --git a/criu/include/criu-log.h b/criu/include/criu-log.h index c2a635ba76..21ef543079 100644 --- a/criu/include/criu-log.h +++ b/criu/include/criu-log.h @@ -22,6 +22,8 @@ #include "log.h" +struct timeval; + extern int log_init(const char *output); extern void log_fini(void); extern int log_init_by_pid(pid_t pid); diff --git a/criu/include/img-remote-proto.h b/criu/include/img-remote-proto.h new file mode 100644 index 0000000000..13cf6c6d25 --- /dev/null +++ b/criu/include/img-remote-proto.h @@ -0,0 +1,88 @@ +#ifndef IMAGE_REMOTE_PVT_H +#define IMAGE_REMOTE_PVT_H + +#include +#include +#include "common/list.h" +#include "img-remote.h" +#include +#include + +#define DEFAULT_LISTEN 50 +#ifndef PAGESIZE +#define PAGESIZE 4096 +#endif +#define BUF_SIZE PAGESIZE + +struct rbuf { + char buffer[BUF_SIZE]; + int nbytes; /* How many bytes are in the buffer. */ + struct list_head l; +}; + +struct rimage { + char path[PATHLEN]; + char snapshot_id[PATHLEN]; + struct list_head l; + struct list_head buf_head; + /* Used to track already sent buffers when the image is appended. */ + struct rbuf *curr_sent_buf; + /* Similar to the previous field. Number of bytes sent in 'curr_sent_buf'. */ + int curr_sent_bytes; + uint64_t size; /* number of bytes */ + pthread_mutex_t in_use; /* Only one operation at a time, per image. */ +}; + +struct wthread { + pthread_t tid; + struct list_head l; + /* Client fd. */ + int fd; + /* The path and snapshot_id identify the request handled by this thread. */ + char path[PATHLEN]; + char snapshot_id[PATHLEN]; + int flags; + /* This semph is used to wake this thread if the image is in memory.*/ + sem_t wakeup_sem; +}; + +/* This variable is used to indicate when the dump is finished. */ +extern bool finished; +/* This is the proxy to cache TCP socket FD. */ +extern int proxy_to_cache_fd; +/* This the unix socket used to fulfill local requests. */ +extern int local_req_fd; + +int init_daemon(bool background, struct rimage *(*wfi)(struct wthread*)); + +void join_workers(void); +void unlock_workers(void); + +void prepare_recv_rimg(void); +void finalize_recv_rimg(struct rimage *rimg); +struct rimage *prepare_remote_image(char *path, char *namesapce, int flags); +struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path); +bool is_receiving(void); + +void *accept_local_image_connections(void *ptr); +void *accept_remote_image_connections(void *ptr); + +int64_t forward_image(struct rimage *rimg); +int64_t send_image(int fd, struct rimage *rimg, int flags, bool image_check); +int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool image_check); + +int64_t pb_write_obj(int fd, void *obj, int type); +int64_t pb_read_obj(int fd, void **obj, int type); + +int64_t write_header(int fd, char *snapshot_id, char *path, int open_mode); +int64_t read_header(int fd, char *snapshot_id, char *path, int *open_mode); +int64_t write_reply_header(int fd, int error); +int64_t read_reply_header(int fd, int *error); +int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *open_mode, uint64_t *size); +int64_t write_remote_header(int fd, char *snapshot_id, char *path, int open_mode, uint64_t size); + +int setup_TCP_server_socket(int port); +int setup_TCP_client_socket(char *hostname, int port); +int setup_UNIX_client_socket(char *path); +int setup_UNIX_server_socket(char *path); +#endif diff --git a/criu/shmem.c b/criu/shmem.c index cee47dba7f..6978621fe3 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -288,7 +288,7 @@ static int open_shmem_sysv(int pid, struct vma_area *vma) VmaEntry *vme = vma->e; struct shmem_info *si; struct shmem_sysv_att *att; - uint64_t ret_fd; + int64_t ret_fd; si = shmem_find(vme->shmid); if (!si) { From cb4f2826be0be14567e211e620f893a2c53b6039 Mon Sep 17 00:00:00 2001 From: "rbruno@gsd.inesc-id.pt" Date: Sat, 11 Feb 2017 04:34:45 +0100 Subject: [PATCH 005/277] zdtm: Add support for image-proxy/image-cache Signed-off-by: Rodrigo Bruno Signed-off-by: Katerina Koukiou Signed-off-by: Pavel Emelyanov --- test/zdtm.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 0153c60589..04ed5307d3 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1011,6 +1011,7 @@ def __init__(self, opts): self.__mdedup = bool(opts['noauto_dedup']) self.__user = bool(opts['user']) self.__leave_stopped = bool(opts['stop']) + self.__remote = bool(opts['remote']) self.__criu = (opts['rpc'] and criu_rpc or criu_cli) self.__show_stats = bool(opts['show_stats']) self.__lazy_pages_p = None @@ -1235,6 +1236,27 @@ def dump(self, action, opts=[]): a_opts += self.__test.getdopts() + if self.__remote: + logdir = os.getcwd() + "/" + self.__dump_path + "/" + str(self.__iter) + print "Adding image cache" + + cache_opts = [self.__criu_bin, "image-cache", "--port", "12345", "-v4", "-o", + logdir + "/image-cache.log", "-D", logdir] + + subprocess.Popen(cache_opts).pid + time.sleep(1) + + print "Adding image proxy" + + proxy_opts = [self.__criu_bin, "image-proxy", "--port", "12345", "--address", + "localhost", "-v4", "-o", logdir + "/image-proxy.log", + "-D", logdir] + + subprocess.Popen(proxy_opts).pid + time.sleep(1) + + a_opts += ["--remote"] + if self.__dedup: a_opts += ["--auto-dedup"] @@ -1287,6 +1309,9 @@ def restore(self): r_opts += ['--empty-ns', 'net'] r_opts += ['--action-script', os.getcwd() + '/empty-netns-prep.sh'] + if self.__remote: + r_opts += ["--remote"] + if self.__dedup: r_opts += ["--auto-dedup"] @@ -1834,7 +1859,7 @@ def run_test(self, name, desc, flavor): 'stop', 'empty_ns', 'fault', 'keep_img', 'report', 'snaps', 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', - 'remote_lazy_pages', 'show_stats', 'lazy_migrate', + 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'remote', 'tls', 'criu_bin', 'crit_bin') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) From d7e080e00b37571d4d9719fe2c144fc9d0ccd06a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 7 Mar 2017 15:55:10 +0300 Subject: [PATCH 006/277] images: add proto2 syntax specification to remote-image.proto To suppress protobuf's warning: > [libprotobuf WARNING google/protobuf/compiler/parser.cc:546] > No syntax specified for the proto file: remote-image.proto. > Please use 'syntax = "proto2";' or 'syntax = "proto3";' > to specify a syntax version. (Defaulted to proto2 syntax.) Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- images/remote-image.proto | 2 ++ 1 file changed, 2 insertions(+) diff --git a/images/remote-image.proto b/images/remote-image.proto index 1212627e39..f6b81503a0 100644 --- a/images/remote-image.proto +++ b/images/remote-image.proto @@ -1,3 +1,5 @@ +syntax = "proto2"; + message local_image_entry { required string name = 1; required string snapshot_id = 2; From 1a1734de8854bf3522ec378afea5f68ba936a2cc Mon Sep 17 00:00:00 2001 From: Rodrigo Bruno Date: Fri, 10 Mar 2017 09:55:53 +0000 Subject: [PATCH 007/277] Fixed BUFFER_SIZE_WARNING issues introduced by remote images code. Signed-off-by: Rodrigo Bruno Signed-off-by: Andrei Vagin --- criu/img-remote-proto.c | 6 ++++-- criu/img-remote.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/criu/img-remote-proto.c b/criu/img-remote-proto.c index ad39c42e75..33a07631fe 100644 --- a/criu/img-remote-proto.c +++ b/criu/img-remote-proto.c @@ -432,8 +432,10 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) return NULL; } - strncpy(rimg->path, path, PATHLEN); - strncpy(rimg->snapshot_id, snapshot_id, PATHLEN); + strncpy(rimg->path, path, PATHLEN -1 ); + rimg->path[PATHLEN - 1] = '\0'; + strncpy(rimg->snapshot_id, snapshot_id, PATHLEN - 1); + rimg->snapshot_id[PATHLEN - 1] = '\0'; rimg->size = 0; buf->nbytes = 0; INIT_LIST_HEAD(&(rimg->buf_head)); diff --git a/criu/img-remote.c b/criu/img-remote.c index 337cb4a7f7..c53217f0ff 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -39,7 +39,8 @@ struct snapshot *new_snapshot(char *snapshot_id) pr_perror("Failed to allocate snapshot structure"); return NULL; } - strncpy(s->snapshot_id, snapshot_id, PATHLEN); + strncpy(s->snapshot_id, snapshot_id, PATHLEN - 1); + s->snapshot_id[PATHLEN - 1]= '\0'; return s; } From 7885381f183d88f9a1101335cb8ac7fe2a4c7b8e Mon Sep 17 00:00:00 2001 From: Rodrigo Bruno Date: Sun, 19 Mar 2017 20:44:15 +0000 Subject: [PATCH 008/277] Fixed RESOURCE_LEAK issues introduced by remote images code. Signed-off-by: rodrigo-bruno Signed-off-by: Andrei Vagin --- criu/img-remote-proto.c | 59 ++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/criu/img-remote-proto.c b/criu/img-remote-proto.c index 33a07631fe..b3762331ae 100644 --- a/criu/img-remote-proto.c +++ b/criu/img-remote-proto.c @@ -194,20 +194,23 @@ int setup_TCP_server_socket(int port) if (setsockopt( sockfd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) == -1) { pr_perror("Unable to set SO_REUSEADDR"); - return -1; + goto err; } if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { pr_perror("Unable to bind image socket"); - return -1; + goto err; } if (listen(sockfd, DEFAULT_LISTEN)) { pr_perror("Unable to listen image socket"); - return -1; + goto err; } return sockfd; +err: + close(sockfd); + return -1; } int setup_TCP_client_socket(char *hostname, int port) @@ -225,7 +228,7 @@ int setup_TCP_client_socket(char *hostname, int port) server = gethostbyname(hostname); if (server == NULL) { pr_perror("Unable to get host by name (%s)", hostname); - return -1; + goto err; } bzero((char *) &serv_addr, sizeof(serv_addr)); @@ -237,10 +240,13 @@ int setup_TCP_client_socket(char *hostname, int port) if (connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { pr_perror("Unable to connect to remote %s", hostname); - return -1; + goto err; } return sockfd; +err: + close(sockfd); + return -1; } int setup_UNIX_server_socket(char *path) @@ -261,15 +267,18 @@ int setup_UNIX_server_socket(char *path) if (bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { pr_perror("Unable to bind image socket"); - return -1; + goto err; } if (listen(sockfd, 50) == -1) { pr_perror("Unable to listen image socket"); - return -1; + goto err; } return sockfd; +err: + close(sockfd); + return -1; } int setup_UNIX_client_socket(char *path) @@ -388,13 +397,16 @@ static struct wthread *new_worker(void) if (!wt) { pr_perror("Unable to allocate worker thread structure"); - return NULL; + goto err; } if (sem_init(&(wt->wakeup_sem), 0, 0) != 0) { pr_perror("Workers semaphore init failed"); - return NULL; + goto err; } return wt; +err: + free(wt); + return NULL; } static void add_worker(struct wthread *wt) @@ -422,14 +434,9 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) struct rimage *rimg = malloc(sizeof(struct rimage)); struct rbuf *buf = malloc(sizeof(struct rbuf)); - if (rimg == NULL) { - pr_perror("Unable to allocate remote_image structures"); - return NULL; - } - - if (buf == NULL) { - pr_perror("Unable to allocate remote_buffer structures"); - return NULL; + if (rimg == NULL || buf == NULL) { + pr_perror("Unable to allocate remote image structures"); + goto err; } strncpy(rimg->path, path, PATHLEN -1 ); @@ -445,9 +452,13 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) if (pthread_mutex_init(&(rimg->in_use), NULL) != 0) { pr_perror("Remote image in_use mutex init failed"); - return NULL; + goto err; } return rimg; +err: + free(rimg); + free(buf); + return NULL; } /* Clears a remote image struct for reusing it. */ @@ -604,7 +615,7 @@ void *accept_local_image_connections(void *port) if (read_header(wt->fd, wt->snapshot_id, wt->path, &(wt->flags)) < 0) { pr_perror("Error reading local image header"); - return NULL; + goto err; } pr_info("Received %s request for %s:%s\n", @@ -626,18 +637,18 @@ void *accept_local_image_connections(void *port) int fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); if (fd < 0) { pr_perror("Can't open %s", LAST_PID_PATH); + goto err; } if (flock(fd, LOCK_EX)) { - close(fd); pr_perror("Can't lock %s", LAST_PID_PATH); - return NULL; + goto err; } if (pthread_create( &tid, NULL, process_local_image_connection, (void *) wt)) { pr_perror("Unable to create worker thread"); - return NULL; + goto err; } if (flock(fd, LOCK_UN)) @@ -647,6 +658,10 @@ void *accept_local_image_connections(void *port) wt->tid = tid; add_worker(wt); } +err: + close(cli_fd); + free(wt); + return NULL; } /* Note: size is a limit on how much we want to read from the socket. Zero means From c6c8e4de46b1c8b04a48dda1d23a437fd9d47623 Mon Sep 17 00:00:00 2001 From: Rodrigo Bruno Date: Sat, 18 Mar 2017 17:51:37 +0000 Subject: [PATCH 009/277] Fixed NULL_RETURNS issues introduced by remote images code. Signed-off-by: rodrigo-bruno Signed-off-by: Andrei Vagin --- criu/image.c | 15 +++++++++++---- criu/util.c | 10 ++++++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/criu/image.c b/criu/image.c index 6a10d60e8d..8863952126 100644 --- a/criu/image.c +++ b/criu/image.c @@ -394,14 +394,20 @@ static int img_write_magic(struct cr_img *img, int oflags, int type) int do_open_remote_image(int dfd, char *path, int flags) { char *snapshot_id = NULL; - int ret; + int ret, save; /* When using namespaces, the current dir is changed so we need to * change to previous working dir and back to correctly open the image * proxy and cache sockets. */ - int save = dirfd(opendir(".")); + save = open(".", O_RDONLY); + if (save < 0) { + pr_perror("unable to open current working directory"); + return -1; + } + if (fchdir(get_service_fd(IMG_FD_OFF)) < 0) { - pr_debug("fchdir to dfd failed!\n"); + pr_perror("fchdir to dfd failed!\n"); + close(save); return -1; } @@ -420,7 +426,8 @@ int do_open_remote_image(int dfd, char *path, int flags) } if (fchdir(save) < 0) { - pr_debug("fchdir to save failed!\n"); + pr_perror("fchdir to save failed"); + close(save); return -1; } close(save); diff --git a/criu/util.c b/criu/util.c index 1933b29ac5..256fa15941 100644 --- a/criu/util.c +++ b/criu/util.c @@ -421,9 +421,15 @@ int copy_file(int fd_in, int fd_out, size_t bytes) { ssize_t written = 0; size_t chunk = bytes ? bytes : 4096; - char *buffer = (char*) malloc(chunk); + char *buffer; ssize_t ret; + buffer = xmalloc(chunk); + if (buffer == NULL) { + pr_perror("failed to allocate buffer to copy file"); + return -1; + } + while (1) { if (opts.remote) { ret = read(fd_in, buffer, chunk); @@ -459,7 +465,7 @@ int copy_file(int fd_in, int fd_out, size_t bytes) written += ret; } err: - free(buffer); + xfree(buffer); return ret; } From 7530b30145e0087277e4ab9f7296d2293d379363 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 23 Mar 2017 15:01:58 -0700 Subject: [PATCH 010/277] criu/img-remote-proto.c: use static mutex init I see no need to do dynamic init here. Cc: Rodrigo Bruno Signed-off-by: Kir Kolyshkin Signed-off-by: Andrei Vagin --- criu/img-remote-proto.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/criu/img-remote-proto.c b/criu/img-remote-proto.c index b3762331ae..3199b7eb51 100644 --- a/criu/img-remote-proto.c +++ b/criu/img-remote-proto.c @@ -19,12 +19,12 @@ #include "image.h" LIST_HEAD(rimg_head); -pthread_mutex_t rimg_lock; +pthread_mutex_t rimg_lock = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t proxy_to_cache_lock; +pthread_mutex_t proxy_to_cache_lock = PTHREAD_MUTEX_INITIALIZER; LIST_HEAD(workers_head); -pthread_mutex_t workers_lock; +pthread_mutex_t workers_lock = PTHREAD_MUTEX_INITIALIZER; sem_t workers_semph; struct rimage * (*wait_for_image) (struct wthread *wt); @@ -69,21 +69,6 @@ static struct wthread *get_wt_by_name(const char *snapshot_id, const char *path) static int init_sync_structures(void) { - if (pthread_mutex_init(&rimg_lock, NULL) != 0) { - pr_perror("Remote image list mutex init failed"); - return -1; - } - - if (pthread_mutex_init(&proxy_to_cache_lock, NULL) != 0) { - pr_perror("Remote image connection mutex init failed"); - return -1; - } - - if (pthread_mutex_init(&workers_lock, NULL) != 0) { - pr_perror("Workers mutex init failed"); - return -1; - } - if (sem_init(&workers_semph, 0, 0) != 0) { pr_perror("Workers semaphore init failed"); return -1; From cc8f97036543d499d5b0d2305bd32ed1f7d96717 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 23 Mar 2017 15:01:59 -0700 Subject: [PATCH 011/277] criu/img-remote-proto.c: error printing fixes OK, so we have pr_perror() for cases where errno is set (and it makes sense to show it), and pr_err() for other errors. A correct function is to be used, depending on the context. 1. pthread_mutex_*() functions don't set errno, therefore pr_perror() should not be used. 2. accept() sets errno => makes sense to use pr_perror(). 3. read_header() arguably sets errno => use pr_err(). 4. open_proc_rw() already prints an error message, there is no need for yet another one. Cc: Rodrigo Bruno Signed-off-by: Kir Kolyshkin Signed-off-by: Andrei Vagin --- criu/img-remote-proto.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/criu/img-remote-proto.c b/criu/img-remote-proto.c index 3199b7eb51..4f440976ac 100644 --- a/criu/img-remote-proto.c +++ b/criu/img-remote-proto.c @@ -436,7 +436,7 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) rimg->curr_sent_bytes = 0; if (pthread_mutex_init(&(rimg->in_use), NULL) != 0) { - pr_perror("Remote image in_use mutex init failed"); + pr_err("Remote image in_use mutex init failed\n"); goto err; } return rimg; @@ -590,7 +590,7 @@ void *accept_local_image_connections(void *port) cli_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); if (cli_fd < 0) { if (!finished) - pr_err("Unable to accept local image connection"); + pr_perror("Unable to accept local image connection"); close(cli_fd); return NULL; } @@ -599,7 +599,7 @@ void *accept_local_image_connections(void *port) wt->fd = cli_fd; if (read_header(wt->fd, wt->snapshot_id, wt->path, &(wt->flags)) < 0) { - pr_perror("Error reading local image header"); + pr_err("Error reading local image header\n"); goto err; } @@ -620,10 +620,8 @@ void *accept_local_image_connections(void *port) * from restore. */ int fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); - if (fd < 0) { - pr_perror("Can't open %s", LAST_PID_PATH); + if (fd < 0) goto err; - } if (flock(fd, LOCK_EX)) { pr_perror("Can't lock %s", LAST_PID_PATH); From 7e3655f3c4d03fba72f9648672a7e3dd3061282a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 23 Mar 2017 15:02:07 -0700 Subject: [PATCH 012/277] criu/img-remote.c: use xmalloc 1. Use xmalloc() where possible. 2. There is no need to print an error message, as xmalloc() has already printed it for you. Cc: Rodrigo Bruno Signed-off-by: Kir Kolyshkin Signed-off-by: Andrei Vagin --- criu/img-remote.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index c53217f0ff..1e37bf33d5 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -33,12 +33,11 @@ struct snapshot { struct snapshot *new_snapshot(char *snapshot_id) { - struct snapshot *s = malloc(sizeof(struct snapshot)); + struct snapshot *s = xmalloc(sizeof(struct snapshot)); - if (!s) { - pr_perror("Failed to allocate snapshot structure"); + if (!s) return NULL; - } + strncpy(s->snapshot_id, snapshot_id, PATHLEN - 1); s->snapshot_id[PATHLEN - 1]= '\0'; return s; @@ -180,7 +179,6 @@ static int pull_snapshot_ids(void) s = new_snapshot(ls->snapshot_id); if (!s) { - pr_perror("Unable create new snapshot structure"); close(sockfd); return -1; } @@ -206,7 +204,6 @@ int push_snapshot_id(void) rn.snapshot_id = xmalloc(sizeof(char) * PATHLEN); if (!rn.snapshot_id) { - pr_perror("Unable to allocate snapshot id buffer"); close(sockfd); return -1; } From eb57e8f0cd7ed81756cd672b4997b42591b4a89c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 23 Mar 2017 15:02:08 -0700 Subject: [PATCH 013/277] criu/img-remote.c: use pr_err not pr_perror In those error paths where we don't have errno set, don't use pr_perror(), use pr_err() instead. Cc: Rodrigo Bruno Signed-off-by: Kir Kolyshkin Signed-off-by: Andrei Vagin --- criu/img-remote.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 1e37bf33d5..2bf62d2a9a 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -54,17 +54,19 @@ int read_remote_image_connection(char *snapshot_id, char *path) int sockfd = setup_UNIX_client_socket(restoring ? DEFAULT_CACHE_SOCKET: DEFAULT_PROXY_SOCKET); if (sockfd < 0) { - pr_perror("Error opening local connection for %s:%s", path, snapshot_id); + pr_err("Error opening local connection for %s:%s\n", + path, snapshot_id); return -1; } if (write_header(sockfd, snapshot_id, path, O_RDONLY) < 0) { - pr_perror("Error writing header for %s:%s", path, snapshot_id); + pr_err("Error writing header for %s:%s\n", path, snapshot_id); return -1; } if (read_reply_header(sockfd, &error) < 0) { - pr_perror("Error reading reply header for %s:%s", path, snapshot_id); + pr_err("Error reading reply header for %s:%s\n", + path, snapshot_id); return -1; } if (!error || !strncmp(path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) @@ -74,7 +76,8 @@ int read_remote_image_connection(char *snapshot_id, char *path) close(sockfd); return -ENOENT; } - pr_perror("Unexpected error returned: %d (%s:%s)\n", error, path, snapshot_id); + pr_err("Unexpected error returned: %d (%s:%s)\n", + error, path, snapshot_id); close(sockfd); return -1; } @@ -87,7 +90,7 @@ int write_remote_image_connection(char *snapshot_id, char *path, int flags) return -1; if (write_header(sockfd, snapshot_id, path, flags) < 0) { - pr_perror("Error writing header for %s:%s", path, snapshot_id); + pr_err("Error writing header for %s:%s\n", path, snapshot_id); return -1; } return sockfd; @@ -99,7 +102,7 @@ int finish_remote_dump(void) int fd = write_remote_image_connection(NULL_SNAPSHOT_ID, DUMP_FINISH, O_WRONLY); if (fd == -1) { - pr_perror("Unable to open finish dump connection"); + pr_err("Unable to open finish dump connection"); return -1; } @@ -113,7 +116,7 @@ int finish_remote_restore(void) int fd = read_remote_image_connection(NULL_SNAPSHOT_ID, RESTORE_FINISH); if (fd == -1) { - pr_perror("Unable to open finish restore connection"); + pr_err("Unable to open finish restore connection\n"); return -1; } @@ -143,7 +146,7 @@ int skip_remote_bytes(int fd, unsigned long len) } if (curr != len) { - pr_perror("Unable to skip the current number of bytes: %lx instead of %lx", + pr_err("Unable to skip the current number of bytes: %lx instead of %lx\n", curr, len); return -1; } @@ -162,7 +165,7 @@ static int pull_snapshot_ids(void) if (sockfd < 0 && errno == ENOENT) return 0; else if (sockfd < 0) { - pr_perror("Unable to open snapshot id read connection"); + pr_err("Unable to open snapshot id read connection\n"); return -1; } @@ -172,7 +175,7 @@ static int pull_snapshot_ids(void) close(sockfd); return n; } else if (n < 0) { - pr_perror("Unable to read remote snapshot ids"); + pr_err("Unable to read remote snapshot ids\n"); close(sockfd); return n; } @@ -198,7 +201,7 @@ int push_snapshot_id(void) int sockfd = write_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG, O_APPEND); if (sockfd < 0) { - pr_perror("Unable to open snapshot id push connection"); + pr_err("Unable to open snapshot id push connection\n"); return -1; } @@ -240,7 +243,8 @@ int get_curr_snapshot_id_idx(void) idx++; } - pr_perror("Error, could not find current snapshot id (%s) fd", snapshot_id); + pr_err("Error, could not find current snapshot id (%s) fd\n", + snapshot_id); return -1; } @@ -263,7 +267,7 @@ char *get_snapshot_id_from_idx(int idx) idx--; } - pr_perror("Error, could not find snapshot id for idx %d", idx); + pr_err("Error, could not find snapshot id for idx %d\n", idx); return NULL; } From f118eb9527c60613ef30cb0267f3d4b6721c77d6 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 25 Apr 2017 13:38:01 +0300 Subject: [PATCH 014/277] pagemap: fix reading pages from socket for --remote case When --remote option is specified, read_local_page tries to pread from a socket, and fails with "Illegal seek" error. Restore single pread call for regular image files case and introduce maybe_read_page_img_cache version of maybe_read_page method. Generally-approved-by: Rodrigo Bruno Acked-by: Pavel Emelyanov Signed-off-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/pagemap.c | 52 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/criu/pagemap.c b/criu/pagemap.c index 8ef5bceda3..056a4dc660 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -249,7 +249,6 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr, { int fd = img_raw_fd(pr->pi); ssize_t ret; - size_t curr = 0; /* * Flush any pending async requests if any not to break the @@ -259,15 +258,10 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr, return -1; pr_debug("\tpr%lu-%u Read page from self %lx/%"PRIx64"\n", pr->img_id, pr->id, pr->cvaddr, pr->pi_off); - while (1) { - ret = pread(fd, buf + curr, len - curr, pr->pi_off + curr); - if (ret < 1) { - pr_perror("Can't read mapping page %zd", ret); - return -1; - } - curr += ret; - if (curr == len) - break; + ret = pread(fd, buf, len, pr->pi_off); + if (ret != len) { + pr_perror("Can't read mapping page %zd", ret); + return -1; } if (opts.auto_dedup) { @@ -411,6 +405,40 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, return ret; } +static int maybe_read_page_img_cache(struct page_read *pr, unsigned long vaddr, + int nr, void *buf, unsigned flags) +{ + unsigned long len = nr * PAGE_SIZE; + int fd = img_raw_fd(pr->pi); + int ret; + size_t curr = 0; + + pr_debug("\tpr%lu-%u Read page from self %lx/%"PRIx64"\n", pr->img_id, pr->id, pr->cvaddr, pr->pi_off); + while (1) { + ret = read(fd, buf + curr, len - curr); + if (ret < 0) { + pr_perror("Can't read mapping page %d", ret); + return -1; + } + curr += ret; + if (curr == len) + break; + } + + if (opts.auto_dedup) { + ret = punch_hole(pr, pr->pi_off, len, false); + if (ret == -1) + return -1; + } + + if (ret == 0 && pr->io_complete) + ret = pr->io_complete(pr, vaddr, nr); + + pr->pi_off += len; + + return ret; +} + static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_pages, void *priv) { int ret = 0; @@ -812,7 +840,9 @@ int open_page_read_at(int dfd, unsigned long img_id, struct page_read *pr, int p pr->id = ids++; pr->img_id = img_id; - if (remote) + if (opts.remote) + pr->maybe_read_page = maybe_read_page_img_cache; + else if (remote) pr->maybe_read_page = maybe_read_page_remote; else { pr->maybe_read_page = maybe_read_page_local; From d2fa4adce72b897eef7edd7d3422aeb409b6e7bb Mon Sep 17 00:00:00 2001 From: Omri Kramer Date: Sat, 1 Jul 2017 18:42:21 +0300 Subject: [PATCH 015/277] Merge img-remote and img-remote-proto There is no real need to have both. Signed-off-by: Omri Kramer Singed-off-by: Lior Fisch Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/Makefile.crtools | 1 - criu/img-cache.c | 2 +- criu/img-proxy.c | 2 +- criu/img-remote-proto.c | 742 -------------------------------- criu/img-remote.c | 719 ++++++++++++++++++++++++++++++- criu/include/img-remote-proto.h | 88 ---- criu/include/img-remote.h | 75 ++++ 7 files changed, 794 insertions(+), 835 deletions(-) delete mode 100644 criu/img-remote-proto.c delete mode 100644 criu/include/img-remote-proto.h diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 1756cac6ce..d19ff8123b 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -32,7 +32,6 @@ obj-y += image.o obj-y += img-remote.o obj-y += img-proxy.o obj-y += img-cache.o -obj-y += img-remote-proto.o obj-y += ipc_ns.o obj-y += irmap.o obj-y += kcmp-ids.o diff --git a/criu/img-cache.c b/criu/img-cache.c index 293597088d..7020a30f02 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -1,6 +1,6 @@ #include -#include "img-remote-proto.h" +#include "img-remote.h" #include "criu-log.h" #include #include diff --git a/criu/img-proxy.c b/criu/img-proxy.c index 58123dccb7..f56073b4e4 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -1,7 +1,7 @@ #include #include "img-remote.h" -#include "img-remote-proto.h" +#include "img-remote.h" #include "criu-log.h" #include #include diff --git a/criu/img-remote-proto.c b/criu/img-remote-proto.c deleted file mode 100644 index 4f440976ac..0000000000 --- a/criu/img-remote-proto.c +++ /dev/null @@ -1,742 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include "sys/un.h" -#include -#include -#include - -#include "img-remote-proto.h" -#include "criu-log.h" -#include "common/compiler.h" - -#include "protobuf.h" -#include "images/remote-image.pb-c.h" -#include "image.h" - -LIST_HEAD(rimg_head); -pthread_mutex_t rimg_lock = PTHREAD_MUTEX_INITIALIZER; - -pthread_mutex_t proxy_to_cache_lock = PTHREAD_MUTEX_INITIALIZER; - -LIST_HEAD(workers_head); -pthread_mutex_t workers_lock = PTHREAD_MUTEX_INITIALIZER; -sem_t workers_semph; - -struct rimage * (*wait_for_image) (struct wthread *wt); - -bool finished = false; -int writing = 0; -int forwarding = 0; -int proxy_to_cache_fd; -int local_req_fd; - -struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path) -{ - struct rimage *rimg = NULL; - - pthread_mutex_lock(&rimg_lock); - list_for_each_entry(rimg, &rimg_head, l) { - if (!strncmp(rimg->path, path, PATHLEN) && - !strncmp(rimg->snapshot_id, snapshot_id, PATHLEN)) { - pthread_mutex_unlock(&rimg_lock); - return rimg; - } - } - pthread_mutex_unlock(&rimg_lock); - return NULL; -} - -static struct wthread *get_wt_by_name(const char *snapshot_id, const char *path) -{ - struct wthread *wt = NULL; - - pthread_mutex_lock(&workers_lock); - list_for_each_entry(wt, &workers_head, l) { - if (!strncmp(wt->path, path, PATHLEN) && - !strncmp(wt->snapshot_id, snapshot_id, PATHLEN)) { - pthread_mutex_unlock(&workers_lock); - return wt; - } - } - pthread_mutex_unlock(&workers_lock); - return NULL; -} - -static int init_sync_structures(void) -{ - if (sem_init(&workers_semph, 0, 0) != 0) { - pr_perror("Workers semaphore init failed"); - return -1; - } - - return 0; -} - -void prepare_recv_rimg(void) -{ - pthread_mutex_lock(&rimg_lock); - writing++; - pthread_mutex_unlock(&rimg_lock); -} - -void finalize_recv_rimg(struct rimage *rimg) -{ - - pthread_mutex_lock(&rimg_lock); - - if (rimg) - list_add_tail(&(rimg->l), &rimg_head); - writing--; - pthread_mutex_unlock(&rimg_lock); - /* Wake thread waiting for this image. */ - if (rimg) { - struct wthread *wt = get_wt_by_name(rimg->snapshot_id, rimg->path); - if (wt) - sem_post(&(wt->wakeup_sem)); - } -} - -bool is_receiving(void) -{ - int ret; - - pthread_mutex_lock(&rimg_lock); - ret = writing; - pthread_mutex_unlock(&rimg_lock); - return ret > 0; -} - -static void prepare_fwd_rimg(void) -{ - pthread_mutex_lock(&rimg_lock); - forwarding++; - pthread_mutex_unlock(&rimg_lock); -} - -static void finalize_fwd_rimg(void) -{ - pthread_mutex_lock(&rimg_lock); - forwarding--; - pthread_mutex_unlock(&rimg_lock); -} - -static bool is_forwarding(void) -{ - int ret; - - pthread_mutex_lock(&rimg_lock); - ret = forwarding; - pthread_mutex_unlock(&rimg_lock); - return ret > 0; -} - -/* This function is called when no more images are coming. Threads still waiting - * for images will be awaken to send a ENOENT (no such file) to the requester. - */ -void unlock_workers(void) -{ - struct wthread *wt = NULL; - - pthread_mutex_lock(&workers_lock); - list_for_each_entry(wt, &workers_head, l) - sem_post(&(wt->wakeup_sem)); - pthread_mutex_unlock(&workers_lock); -} - -int init_daemon(bool background, struct rimage *(*wfi)(struct wthread*)) -{ - if (background) { - if (daemon(1, 0) == -1) { - pr_perror("Can't run service server in the background"); - return -1; - } - } - wait_for_image = wfi; - return init_sync_structures(); -} - -int setup_TCP_server_socket(int port) -{ - struct sockaddr_in serv_addr; - int sockopt = 1; - int sockfd = socket(AF_INET, SOCK_STREAM, 0); - - if (sockfd < 0) { - pr_perror("Unable to open image socket"); - return -1; - } - - bzero((char *) &serv_addr, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - serv_addr.sin_addr.s_addr = INADDR_ANY; - serv_addr.sin_port = htons(port); - - if (setsockopt( - sockfd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) == -1) { - pr_perror("Unable to set SO_REUSEADDR"); - goto err; - } - - if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { - pr_perror("Unable to bind image socket"); - goto err; - } - - if (listen(sockfd, DEFAULT_LISTEN)) { - pr_perror("Unable to listen image socket"); - goto err; - } - - return sockfd; -err: - close(sockfd); - return -1; -} - -int setup_TCP_client_socket(char *hostname, int port) -{ - int sockfd; - struct sockaddr_in serv_addr; - struct hostent *server; - - sockfd = socket(AF_INET, SOCK_STREAM, 0); - if (sockfd < 0) { - pr_perror("Unable to open remote image socket"); - return -1; - } - - server = gethostbyname(hostname); - if (server == NULL) { - pr_perror("Unable to get host by name (%s)", hostname); - goto err; - } - - bzero((char *) &serv_addr, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - bcopy((char *) server->h_addr, - (char *) &serv_addr.sin_addr.s_addr, - server->h_length); - serv_addr.sin_port = htons(port); - - if (connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { - pr_perror("Unable to connect to remote %s", hostname); - goto err; - } - - return sockfd; -err: - close(sockfd); - return -1; -} - -int setup_UNIX_server_socket(char *path) -{ - struct sockaddr_un addr; - int sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - - if (sockfd < 0) { - pr_perror("Unable to open image socket"); - return -1; - } - - memset(&addr, 0, sizeof(addr)); - addr.sun_family = AF_UNIX; - strncpy(addr.sun_path, path, sizeof(addr.sun_path)-1); - - unlink(path); - - if (bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { - pr_perror("Unable to bind image socket"); - goto err; - } - - if (listen(sockfd, 50) == -1) { - pr_perror("Unable to listen image socket"); - goto err; - } - - return sockfd; -err: - close(sockfd); - return -1; -} - -int setup_UNIX_client_socket(char *path) -{ - struct sockaddr_un addr; - int sockfd = socket(AF_UNIX, SOCK_STREAM, 0); - - if (sockfd < 0) { - pr_perror("Unable to open local image socket"); - return -1; - } - - memset(&addr, 0, sizeof(addr)); - addr.sun_family = AF_UNIX; - strncpy(addr.sun_path, path, sizeof(addr.sun_path)-1); - - if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { - pr_perror("Unable to connect to local socket: %s", path); - close(sockfd); - return -1; - } - - return sockfd; -} - -int64_t pb_write_obj(int fd, void *obj, int type) -{ - struct cr_img img; - - img._x.fd = fd; - bfd_setraw(&img._x); - return pb_write_one(&img, obj, type); -} - -int64_t pb_read_obj(int fd, void **pobj, int type) -{ - struct cr_img img; - - img._x.fd = fd; - bfd_setraw(&img._x); - return do_pb_read_one(&img, pobj, type, true); -} - -int64_t write_header(int fd, char *snapshot_id, char *path, int flags) -{ - LocalImageEntry li = LOCAL_IMAGE_ENTRY__INIT; - - li.name = path; - li.snapshot_id = snapshot_id; - li.open_mode = flags; - return pb_write_obj(fd, &li, PB_LOCAL_IMAGE); -} - -int64_t write_reply_header(int fd, int error) -{ - LocalImageReplyEntry lir = LOCAL_IMAGE_REPLY_ENTRY__INIT; - - lir.error = error; - return pb_write_obj(fd, &lir, PB_LOCAL_IMAGE_REPLY); -} - -int64_t write_remote_header(int fd, char *snapshot_id, char *path, int flags, uint64_t size) -{ - RemoteImageEntry ri = REMOTE_IMAGE_ENTRY__INIT; - - ri.name = path; - ri.snapshot_id = snapshot_id; - ri.open_mode = flags; - ri.size = size; - return pb_write_obj(fd, &ri, PB_REMOTE_IMAGE); -} - -int64_t read_header(int fd, char *snapshot_id, char *path, int *flags) -{ - LocalImageEntry *li; - int ret = pb_read_obj(fd, (void **)&li, PB_LOCAL_IMAGE); - - if (ret > 0) { - strncpy(snapshot_id, li->snapshot_id, PATHLEN); - strncpy(path, li->name, PATHLEN); - *flags = li->open_mode; - } - free(li); - return ret; -} - -int64_t read_reply_header(int fd, int *error) -{ - LocalImageReplyEntry *lir; - int ret = pb_read_obj(fd, (void **)&lir, PB_LOCAL_IMAGE_REPLY); - - if (ret > 0) - *error = lir->error; - free(lir); - return ret; -} - -int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *flags, uint64_t *size) -{ - RemoteImageEntry *ri; - int ret = pb_read_obj(fd, (void **)&ri, PB_REMOTE_IMAGE); - - if (ret > 0) { - strncpy(snapshot_id, ri->snapshot_id, PATHLEN); - strncpy(path, ri->name, PATHLEN); - *flags = ri->open_mode; - *size = ri->size; - } - free(ri); - return ret; -} - -static struct wthread *new_worker(void) -{ - struct wthread *wt = malloc(sizeof(struct wthread)); - - if (!wt) { - pr_perror("Unable to allocate worker thread structure"); - goto err; - } - if (sem_init(&(wt->wakeup_sem), 0, 0) != 0) { - pr_perror("Workers semaphore init failed"); - goto err; - } - return wt; -err: - free(wt); - return NULL; -} - -static void add_worker(struct wthread *wt) -{ - pthread_mutex_lock(&workers_lock); - list_add_tail(&(wt->l), &workers_head); - pthread_mutex_unlock(&workers_lock); - sem_post(&workers_semph); -} - -void join_workers(void) -{ - struct wthread *wthread = NULL; - - while (! list_empty(&workers_head)) { - wthread = list_entry(workers_head.next, struct wthread, l); - pthread_join(wthread->tid, NULL); - list_del(&(wthread->l)); - free(wthread); - } -} - -static struct rimage *new_remote_image(char *path, char *snapshot_id) -{ - struct rimage *rimg = malloc(sizeof(struct rimage)); - struct rbuf *buf = malloc(sizeof(struct rbuf)); - - if (rimg == NULL || buf == NULL) { - pr_perror("Unable to allocate remote image structures"); - goto err; - } - - strncpy(rimg->path, path, PATHLEN -1 ); - rimg->path[PATHLEN - 1] = '\0'; - strncpy(rimg->snapshot_id, snapshot_id, PATHLEN - 1); - rimg->snapshot_id[PATHLEN - 1] = '\0'; - rimg->size = 0; - buf->nbytes = 0; - INIT_LIST_HEAD(&(rimg->buf_head)); - list_add_tail(&(buf->l), &(rimg->buf_head)); - rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); - rimg->curr_sent_bytes = 0; - - if (pthread_mutex_init(&(rimg->in_use), NULL) != 0) { - pr_err("Remote image in_use mutex init failed\n"); - goto err; - } - return rimg; -err: - free(rimg); - free(buf); - return NULL; -} - -/* Clears a remote image struct for reusing it. */ -static struct rimage *clear_remote_image(struct rimage *rimg) -{ - pthread_mutex_lock(&(rimg->in_use)); - - while (!list_is_singular(&(rimg->buf_head))) { - struct rbuf *buf = list_entry(rimg->buf_head.prev, struct rbuf, l); - - list_del(rimg->buf_head.prev); - free(buf); - } - - list_entry(rimg->buf_head.next, struct rbuf, l)->nbytes = 0; - rimg->size = 0; - rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); - rimg->curr_sent_bytes = 0; - - pthread_mutex_unlock(&(rimg->in_use)); - - return rimg; -} - -struct rimage *prepare_remote_image(char *path, char *snapshot_id, int open_mode) -{ - struct rimage *rimg = get_rimg_by_name(snapshot_id, path); - /* There is no record of such image, create a new one. */ - - if (rimg == NULL) - return new_remote_image(path, snapshot_id); - - pthread_mutex_lock(&rimg_lock); - list_del(&(rimg->l)); - pthread_mutex_unlock(&rimg_lock); - - /* There is already an image record. Simply return it for appending. */ - if (open_mode == O_APPEND) - return rimg; - /* There is already an image record. Clear it for writing. */ - else - return clear_remote_image(rimg); -} - -void *process_local_read(struct wthread *wt) -{ - struct rimage *rimg = NULL; - int64_t ret; - /* TODO - split wait_for_image - * in cache - improve the parent stuf - * in proxy - do not wait for anything, return no file - */ - rimg = wait_for_image(wt); - if (!rimg) { - pr_info("No image %s:%s.\n", wt->path, wt->snapshot_id); - if (write_reply_header(wt->fd, ENOENT) < 0) - pr_perror("Error writing reply header for unexisting image"); - close(wt->fd); - return NULL; - } else { - if (write_reply_header(wt->fd, 0) < 0) { - pr_perror("Error writing reply header for %s:%s", - wt->path, wt->snapshot_id); - close(wt->fd); - return NULL; - } - } - - pthread_mutex_lock(&(rimg->in_use)); - ret = send_image(wt->fd, rimg, wt->flags, true); - if (ret < 0) - pr_perror("Unable to send %s:%s to CRIU (sent %ld bytes)", - rimg->path, rimg->snapshot_id, (long)ret); - else - pr_info("Finished sending %s:%s to CRIU (sent %ld bytes)\n", - rimg->path, rimg->snapshot_id, (long)ret); - pthread_mutex_unlock(&(rimg->in_use)); - return NULL; -} - -static void *process_local_image_connection(void *ptr) -{ - struct wthread *wt = (struct wthread *) ptr; - struct rimage *rimg = NULL; - int64_t ret; - - /* NOTE: the code inside this if is shared for both cache and proxy. */ - if (wt->flags == O_RDONLY) - return process_local_read(wt); - - /* NOTE: IMAGE PROXY ONLY. The image cache receives write connections - * through TCP (see accept_remote_image_connections). - */ - rimg = prepare_remote_image(wt->path, wt->snapshot_id, wt->flags); - ret = recv_image(wt->fd, rimg, 0, wt->flags, true); - if (ret < 0) { - pr_perror("Unable to receive %s:%s to CRIU (received %ld bytes)", - rimg->path, rimg->snapshot_id, (long)ret); - finalize_recv_rimg(NULL); - return NULL; - } - finalize_recv_rimg(rimg); - pr_info("Finished receiving %s:%s (received %ld bytes)\n", - rimg->path, rimg->snapshot_id, (long)ret); - - - if (!strncmp(rimg->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { - finished = true; - shutdown(local_req_fd, SHUT_RD); - } else { - pthread_mutex_lock(&proxy_to_cache_lock); - ret = forward_image(rimg); - pthread_mutex_unlock(&proxy_to_cache_lock); - } - - finalize_fwd_rimg(); - if (ret < 0) { - pr_perror("Unable to forward %s:%s to Image Cache", - rimg->path, rimg->snapshot_id); - - return NULL; - } - - if (finished && !is_forwarding() && !is_receiving()) { - pr_info("Closing connection to Image Cache.\n"); - close(proxy_to_cache_fd); - unlock_workers(); - } - return NULL; -} - - -void *accept_local_image_connections(void *port) -{ - int fd = *((int *) port); - int cli_fd; - struct sockaddr_in cli_addr; - - socklen_t clilen = sizeof(cli_addr); - pthread_t tid; - struct wthread *wt; - - while (1) { - cli_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); - if (cli_fd < 0) { - if (!finished) - pr_perror("Unable to accept local image connection"); - close(cli_fd); - return NULL; - } - - wt = new_worker(); - wt->fd = cli_fd; - - if (read_header(wt->fd, wt->snapshot_id, wt->path, &(wt->flags)) < 0) { - pr_err("Error reading local image header\n"); - goto err; - } - - pr_info("Received %s request for %s:%s\n", - wt->flags == O_RDONLY ? "read" : - wt->flags == O_APPEND ? "append" : "write", - wt->path, wt->snapshot_id); - - /* These function calls are used to avoid other threads from - * thinking that there are no more images are coming. - */ - if (wt->flags != O_RDONLY) { - prepare_recv_rimg(); - prepare_fwd_rimg(); - } - - /* We need to flock the last pid file to avoid stealing pids - * from restore. - */ - int fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); - if (fd < 0) - goto err; - - if (flock(fd, LOCK_EX)) { - pr_perror("Can't lock %s", LAST_PID_PATH); - goto err; - } - - if (pthread_create( - &tid, NULL, process_local_image_connection, (void *) wt)) { - pr_perror("Unable to create worker thread"); - goto err; - } - - if (flock(fd, LOCK_UN)) - pr_perror("Can't unlock %s", LAST_PID_PATH); - close(fd); - - wt->tid = tid; - add_worker(wt); - } -err: - close(cli_fd); - free(wt); - return NULL; -} - -/* Note: size is a limit on how much we want to read from the socket. Zero means - * read until the socket is closed. - */ -int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool close_fd) -{ - struct rbuf *curr_buf = NULL; - int n; - - if (flags == O_APPEND) - curr_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); - else - curr_buf = list_entry(rimg->buf_head.next, struct rbuf, l); - - while (1) { - n = read(fd, - curr_buf->buffer + curr_buf->nbytes, - size ? - min((int) (size - rimg->size), BUF_SIZE - curr_buf->nbytes) : - BUF_SIZE - curr_buf->nbytes); - if (n == 0) { - if (close_fd) - close(fd); - return rimg->size; - } else if (n > 0) { - curr_buf->nbytes += n; - rimg->size += n; - if (curr_buf->nbytes == BUF_SIZE) { - struct rbuf *buf = malloc(sizeof(struct rbuf)); - if (buf == NULL) { - pr_perror("Unable to allocate remote_buffer structures"); - if (close_fd) - close(fd); - return -1; - } - buf->nbytes = 0; - list_add_tail(&(buf->l), &(rimg->buf_head)); - curr_buf = buf; - } - if (size && rimg->size == size) { - if (close_fd) - close(fd); - return rimg->size; - } - } else { - pr_perror("Read on %s:%s socket failed", - rimg->path, rimg->snapshot_id); - if (close_fd) - close(fd); - return -1; - } - } -} - -int64_t send_image(int fd, struct rimage *rimg, int flags, bool close_fd) -{ - - int n, nblocks = 0; - - if (flags != O_APPEND) { - rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); - rimg->curr_sent_bytes = 0; - } - - while (1) { - n = send( - fd, - rimg->curr_sent_buf->buffer + rimg->curr_sent_bytes, - min(BUF_SIZE, rimg->curr_sent_buf->nbytes) - rimg->curr_sent_bytes, - MSG_NOSIGNAL); - if (n > -1) { - rimg->curr_sent_bytes += n; - if (rimg->curr_sent_bytes == BUF_SIZE) { - rimg->curr_sent_buf = - list_entry(rimg->curr_sent_buf->l.next, struct rbuf, l); - nblocks++; - rimg->curr_sent_bytes = 0; - } else if (rimg->curr_sent_bytes == rimg->curr_sent_buf->nbytes) { - if (close_fd) - close(fd); - return nblocks*BUF_SIZE + rimg->curr_sent_buf->nbytes; - } - } else if (errno == EPIPE || errno == ECONNRESET) { - pr_warn("Connection for %s:%s was closed early than expected\n", - rimg->path, rimg->snapshot_id); - return 0; - } else { - pr_perror("Write on %s:%s socket failed", - rimg->path, rimg->snapshot_id); - return -1; - } - } - -} diff --git a/criu/img-remote.c b/criu/img-remote.c index 2bf62d2a9a..f812c52d52 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -8,7 +8,6 @@ #include "xmalloc.h" #include "criu-log.h" #include "img-remote.h" -#include "img-remote-proto.h" #include "images/remote-image.pb-c.h" #include "protobuf-desc.h" #include @@ -16,11 +15,37 @@ #include "common/compiler.h" #include "cr_options.h" +#include +#include "sys/un.h" +#include +#include + +#include "protobuf.h" +#include "image.h" + #define PB_LOCAL_IMAGE_SIZE PATHLEN static char *snapshot_id; bool restoring = true; +LIST_HEAD(rimg_head); +pthread_mutex_t rimg_lock = PTHREAD_MUTEX_INITIALIZER; + +pthread_mutex_t proxy_to_cache_lock = PTHREAD_MUTEX_INITIALIZER; + +LIST_HEAD(workers_head); +pthread_mutex_t workers_lock = PTHREAD_MUTEX_INITIALIZER; +sem_t workers_semph; + +struct rimage * (*wait_for_image) (struct wthread *wt); + +bool finished = false; +int writing = 0; +int forwarding = 0; +int proxy_to_cache_fd; +int local_req_fd; + + LIST_HEAD(snapshot_head); /* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an @@ -48,9 +73,699 @@ void add_snapshot(struct snapshot *snapshot) list_add_tail(&(snapshot->l), &snapshot_head); } +struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path) +{ + struct rimage *rimg = NULL; + + pthread_mutex_lock(&rimg_lock); + list_for_each_entry(rimg, &rimg_head, l) { + if (!strncmp(rimg->path, path, PATHLEN) && + !strncmp(rimg->snapshot_id, snapshot_id, PATHLEN)) { + pthread_mutex_unlock(&rimg_lock); + return rimg; + } + } + pthread_mutex_unlock(&rimg_lock); + return NULL; +} + +static struct wthread *get_wt_by_name(const char *snapshot_id, const char *path) +{ + struct wthread *wt = NULL; + + pthread_mutex_lock(&workers_lock); + list_for_each_entry(wt, &workers_head, l) { + if (!strncmp(wt->path, path, PATHLEN) && + !strncmp(wt->snapshot_id, snapshot_id, PATHLEN)) { + pthread_mutex_unlock(&workers_lock); + return wt; + } + } + pthread_mutex_unlock(&workers_lock); + return NULL; +} + +static int init_sync_structures(void) +{ + if (sem_init(&workers_semph, 0, 0) != 0) { + pr_perror("Workers semaphore init failed"); + return -1; + } + + return 0; +} + +void prepare_recv_rimg(void) +{ + pthread_mutex_lock(&rimg_lock); + writing++; + pthread_mutex_unlock(&rimg_lock); +} + +void finalize_recv_rimg(struct rimage *rimg) +{ + + pthread_mutex_lock(&rimg_lock); + + if (rimg) + list_add_tail(&(rimg->l), &rimg_head); + writing--; + pthread_mutex_unlock(&rimg_lock); + /* Wake thread waiting for this image. */ + if (rimg) { + struct wthread *wt = get_wt_by_name(rimg->snapshot_id, rimg->path); + if (wt) + sem_post(&(wt->wakeup_sem)); + } +} + +bool is_receiving(void) +{ + int ret; + + pthread_mutex_lock(&rimg_lock); + ret = writing; + pthread_mutex_unlock(&rimg_lock); + return ret > 0; +} + +static void prepare_fwd_rimg(void) +{ + pthread_mutex_lock(&rimg_lock); + forwarding++; + pthread_mutex_unlock(&rimg_lock); +} + +static void finalize_fwd_rimg(void) +{ + pthread_mutex_lock(&rimg_lock); + forwarding--; + pthread_mutex_unlock(&rimg_lock); +} + +static bool is_forwarding(void) +{ + int ret; + + pthread_mutex_lock(&rimg_lock); + ret = forwarding; + pthread_mutex_unlock(&rimg_lock); + return ret > 0; +} + +/* This function is called when no more images are coming. Threads still waiting + * for images will be awaken to send a ENOENT (no such file) to the requester. + */ +void unlock_workers(void) +{ + struct wthread *wt = NULL; + + pthread_mutex_lock(&workers_lock); + list_for_each_entry(wt, &workers_head, l) + sem_post(&(wt->wakeup_sem)); + pthread_mutex_unlock(&workers_lock); +} + +int init_daemon(bool background, struct rimage *(*wfi)(struct wthread*)) +{ + if (background) { + if (daemon(1, 0) == -1) { + pr_perror("Can't run service server in the background"); + return -1; + } + } + wait_for_image = wfi; + return init_sync_structures(); +} + +int setup_TCP_server_socket(int port) +{ + struct sockaddr_in serv_addr; + int sockopt = 1; + int sockfd = socket(AF_INET, SOCK_STREAM, 0); + + if (sockfd < 0) { + pr_perror("Unable to open image socket"); + return -1; + } + + bzero((char *) &serv_addr, sizeof(serv_addr)); + serv_addr.sin_family = AF_INET; + serv_addr.sin_addr.s_addr = INADDR_ANY; + serv_addr.sin_port = htons(port); + + if (setsockopt( + sockfd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) == -1) { + pr_perror("Unable to set SO_REUSEADDR"); + goto err; + } + + if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { + pr_perror("Unable to bind image socket"); + goto err; + } + + if (listen(sockfd, DEFAULT_LISTEN)) { + pr_perror("Unable to listen image socket"); + goto err; + } + + return sockfd; +err: + close(sockfd); + return -1; +} + +int setup_TCP_client_socket(char *hostname, int port) +{ + int sockfd; + struct sockaddr_in serv_addr; + struct hostent *server; + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) { + pr_perror("Unable to open remote image socket"); + return -1; + } + + server = gethostbyname(hostname); + if (server == NULL) { + pr_perror("Unable to get host by name (%s)", hostname); + goto err; + } + + bzero((char *) &serv_addr, sizeof(serv_addr)); + serv_addr.sin_family = AF_INET; + bcopy((char *) server->h_addr, + (char *) &serv_addr.sin_addr.s_addr, + server->h_length); + serv_addr.sin_port = htons(port); + + if (connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { + pr_perror("Unable to connect to remote %s", hostname); + goto err; + } + + return sockfd; +err: + close(sockfd); + return -1; +} + +int setup_UNIX_server_socket(char *path) +{ + struct sockaddr_un addr; + int sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + + if (sockfd < 0) { + pr_perror("Unable to open image socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, path, sizeof(addr.sun_path)-1); + + unlink(path); + + if (bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + pr_perror("Unable to bind image socket"); + goto err; + } + + if (listen(sockfd, 50) == -1) { + pr_perror("Unable to listen image socket"); + goto err; + } + + return sockfd; +err: + close(sockfd); + return -1; +} + +static int setup_UNIX_client_socket(char *path) +{ + struct sockaddr_un addr; + int sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + + if (sockfd < 0) { + pr_perror("Unable to open local image socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, path, sizeof(addr.sun_path)-1); + + if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + pr_perror("Unable to connect to local socket: %s", path); + close(sockfd); + return -1; + } + + return sockfd; +} + +static int64_t pb_write_obj(int fd, void *obj, int type) +{ + struct cr_img img; + + img._x.fd = fd; + bfd_setraw(&img._x); + return pb_write_one(&img, obj, type); +} + +static int64_t pb_read_obj(int fd, void **pobj, int type) +{ + struct cr_img img; + + img._x.fd = fd; + bfd_setraw(&img._x); + return do_pb_read_one(&img, pobj, type, true); +} + +static int64_t write_header(int fd, char *snapshot_id, char *path, int flags) +{ + LocalImageEntry li = LOCAL_IMAGE_ENTRY__INIT; + + li.name = path; + li.snapshot_id = snapshot_id; + li.open_mode = flags; + return pb_write_obj(fd, &li, PB_LOCAL_IMAGE); +} + +static int64_t write_reply_header(int fd, int error) +{ + LocalImageReplyEntry lir = LOCAL_IMAGE_REPLY_ENTRY__INIT; + + lir.error = error; + return pb_write_obj(fd, &lir, PB_LOCAL_IMAGE_REPLY); +} + +int64_t write_remote_header(int fd, char *snapshot_id, char *path, int flags, uint64_t size) +{ + RemoteImageEntry ri = REMOTE_IMAGE_ENTRY__INIT; + + ri.name = path; + ri.snapshot_id = snapshot_id; + ri.open_mode = flags; + ri.size = size; + return pb_write_obj(fd, &ri, PB_REMOTE_IMAGE); +} + +static int64_t read_header(int fd, char *snapshot_id, char *path, int *flags) +{ + LocalImageEntry *li; + int ret = pb_read_obj(fd, (void **)&li, PB_LOCAL_IMAGE); + + if (ret > 0) { + strncpy(snapshot_id, li->snapshot_id, PATHLEN); + strncpy(path, li->name, PATHLEN); + *flags = li->open_mode; + } + free(li); + return ret; +} + +static int64_t read_reply_header(int fd, int *error) +{ + LocalImageReplyEntry *lir; + int ret = pb_read_obj(fd, (void **)&lir, PB_LOCAL_IMAGE_REPLY); + + if (ret > 0) + *error = lir->error; + free(lir); + return ret; +} + +int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *flags, uint64_t *size) +{ + RemoteImageEntry *ri; + int ret = pb_read_obj(fd, (void **)&ri, PB_REMOTE_IMAGE); + + if (ret > 0) { + strncpy(snapshot_id, ri->snapshot_id, PATHLEN); + strncpy(path, ri->name, PATHLEN); + *flags = ri->open_mode; + *size = ri->size; + } + free(ri); + return ret; +} + +static struct wthread *new_worker(void) +{ + struct wthread *wt = malloc(sizeof(struct wthread)); + + if (!wt) { + pr_perror("Unable to allocate worker thread structure"); + goto err; + } + if (sem_init(&(wt->wakeup_sem), 0, 0) != 0) { + pr_perror("Workers semaphore init failed"); + goto err; + } + return wt; +err: + free(wt); + return NULL; +} + +static void add_worker(struct wthread *wt) +{ + pthread_mutex_lock(&workers_lock); + list_add_tail(&(wt->l), &workers_head); + pthread_mutex_unlock(&workers_lock); + sem_post(&workers_semph); +} + +void join_workers(void) +{ + struct wthread *wthread = NULL; + + while (! list_empty(&workers_head)) { + wthread = list_entry(workers_head.next, struct wthread, l); + pthread_join(wthread->tid, NULL); + list_del(&(wthread->l)); + free(wthread); + } +} + +static struct rimage *new_remote_image(char *path, char *snapshot_id) +{ + struct rimage *rimg = malloc(sizeof(struct rimage)); + struct rbuf *buf = malloc(sizeof(struct rbuf)); + + if (rimg == NULL || buf == NULL) { + pr_perror("Unable to allocate remote image structures"); + goto err; + } + + strncpy(rimg->path, path, PATHLEN -1 ); + rimg->path[PATHLEN - 1] = '\0'; + strncpy(rimg->snapshot_id, snapshot_id, PATHLEN - 1); + rimg->snapshot_id[PATHLEN - 1] = '\0'; + rimg->size = 0; + buf->nbytes = 0; + INIT_LIST_HEAD(&(rimg->buf_head)); + list_add_tail(&(buf->l), &(rimg->buf_head)); + rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + rimg->curr_sent_bytes = 0; + + if (pthread_mutex_init(&(rimg->in_use), NULL) != 0) { + pr_err("Remote image in_use mutex init failed\n"); + goto err; + } + return rimg; +err: + free(rimg); + free(buf); + return NULL; +} + +/* Clears a remote image struct for reusing it. */ +static struct rimage *clear_remote_image(struct rimage *rimg) +{ + pthread_mutex_lock(&(rimg->in_use)); + + while (!list_is_singular(&(rimg->buf_head))) { + struct rbuf *buf = list_entry(rimg->buf_head.prev, struct rbuf, l); + + list_del(rimg->buf_head.prev); + free(buf); + } + + list_entry(rimg->buf_head.next, struct rbuf, l)->nbytes = 0; + rimg->size = 0; + rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + rimg->curr_sent_bytes = 0; + + pthread_mutex_unlock(&(rimg->in_use)); + + return rimg; +} + +struct rimage *prepare_remote_image(char *path, char *snapshot_id, int open_mode) +{ + struct rimage *rimg = get_rimg_by_name(snapshot_id, path); + /* There is no record of such image, create a new one. */ + + if (rimg == NULL) + return new_remote_image(path, snapshot_id); + + pthread_mutex_lock(&rimg_lock); + list_del(&(rimg->l)); + pthread_mutex_unlock(&rimg_lock); + + /* There is already an image record. Simply return it for appending. */ + if (open_mode == O_APPEND) + return rimg; + /* There is already an image record. Clear it for writing. */ + else + return clear_remote_image(rimg); +} + +static void *process_local_read(struct wthread *wt) +{ + struct rimage *rimg = NULL; + int64_t ret; + /* TODO - split wait_for_image + * in cache - improve the parent stuf + * in proxy - do not wait for anything, return no file + */ + rimg = wait_for_image(wt); + if (!rimg) { + pr_info("No image %s:%s.\n", wt->path, wt->snapshot_id); + if (write_reply_header(wt->fd, ENOENT) < 0) + pr_perror("Error writing reply header for unexisting image"); + close(wt->fd); + return NULL; + } else { + if (write_reply_header(wt->fd, 0) < 0) { + pr_perror("Error writing reply header for %s:%s", + wt->path, wt->snapshot_id); + close(wt->fd); + return NULL; + } + } + + pthread_mutex_lock(&(rimg->in_use)); + ret = send_image(wt->fd, rimg, wt->flags, true); + if (ret < 0) + pr_perror("Unable to send %s:%s to CRIU (sent %ld bytes)", + rimg->path, rimg->snapshot_id, (long)ret); + else + pr_info("Finished sending %s:%s to CRIU (sent %ld bytes)\n", + rimg->path, rimg->snapshot_id, (long)ret); + pthread_mutex_unlock(&(rimg->in_use)); + return NULL; +} + +static void *process_local_image_connection(void *ptr) +{ + struct wthread *wt = (struct wthread *) ptr; + struct rimage *rimg = NULL; + int64_t ret; + + /* NOTE: the code inside this if is shared for both cache and proxy. */ + if (wt->flags == O_RDONLY) + return process_local_read(wt); + + /* NOTE: IMAGE PROXY ONLY. The image cache receives write connections + * through TCP (see accept_remote_image_connections). + */ + rimg = prepare_remote_image(wt->path, wt->snapshot_id, wt->flags); + ret = recv_image(wt->fd, rimg, 0, wt->flags, true); + if (ret < 0) { + pr_perror("Unable to receive %s:%s to CRIU (received %ld bytes)", + rimg->path, rimg->snapshot_id, (long)ret); + finalize_recv_rimg(NULL); + return NULL; + } + finalize_recv_rimg(rimg); + pr_info("Finished receiving %s:%s (received %ld bytes)\n", + rimg->path, rimg->snapshot_id, (long)ret); + + + if (!strncmp(rimg->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { + finished = true; + shutdown(local_req_fd, SHUT_RD); + } else { + pthread_mutex_lock(&proxy_to_cache_lock); + ret = forward_image(rimg); + pthread_mutex_unlock(&proxy_to_cache_lock); + } + + finalize_fwd_rimg(); + if (ret < 0) { + pr_perror("Unable to forward %s:%s to Image Cache", + rimg->path, rimg->snapshot_id); + + return NULL; + } + + if (finished && !is_forwarding() && !is_receiving()) { + pr_info("Closing connection to Image Cache.\n"); + close(proxy_to_cache_fd); + unlock_workers(); + } + return NULL; +} + + +void *accept_local_image_connections(void *port) +{ + int fd = *((int *) port); + int cli_fd; + struct sockaddr_in cli_addr; + + socklen_t clilen = sizeof(cli_addr); + pthread_t tid; + struct wthread *wt; + + while (1) { + cli_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); + if (cli_fd < 0) { + if (!finished) + pr_perror("Unable to accept local image connection"); + close(cli_fd); + return NULL; + } + + wt = new_worker(); + wt->fd = cli_fd; + + if (read_header(wt->fd, wt->snapshot_id, wt->path, &(wt->flags)) < 0) { + pr_err("Error reading local image header\n"); + goto err; + } + + pr_info("Received %s request for %s:%s\n", + wt->flags == O_RDONLY ? "read" : + wt->flags == O_APPEND ? "append" : "write", + wt->path, wt->snapshot_id); + + /* These function calls are used to avoid other threads from + * thinking that there are no more images are coming. + */ + if (wt->flags != O_RDONLY) { + prepare_recv_rimg(); + prepare_fwd_rimg(); + } + + if (pthread_create( + &tid, NULL, process_local_image_connection, (void *) wt)) { + pr_perror("Unable to create worker thread"); + goto err; + } + + wt->tid = tid; + add_worker(wt); + } +err: + close(cli_fd); + free(wt); + return NULL; +} + +/* Note: size is a limit on how much we want to read from the socket. Zero means + * read until the socket is closed. + */ +int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool close_fd) +{ + struct rbuf *curr_buf = NULL; + int n; + + if (flags == O_APPEND) + curr_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); + else + curr_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + + while (1) { + n = read(fd, + curr_buf->buffer + curr_buf->nbytes, + size ? + min((int) (size - rimg->size), BUF_SIZE - curr_buf->nbytes) : + BUF_SIZE - curr_buf->nbytes); + if (n == 0) { + if (close_fd) + close(fd); + return rimg->size; + } else if (n > 0) { + curr_buf->nbytes += n; + rimg->size += n; + if (curr_buf->nbytes == BUF_SIZE) { + struct rbuf *buf = malloc(sizeof(struct rbuf)); + if (buf == NULL) { + pr_perror("Unable to allocate remote_buffer structures"); + if (close_fd) + close(fd); + return -1; + } + buf->nbytes = 0; + list_add_tail(&(buf->l), &(rimg->buf_head)); + curr_buf = buf; + } + if (size && rimg->size == size) { + if (close_fd) + close(fd); + return rimg->size; + } + } else { + pr_perror("Read on %s:%s socket failed", + rimg->path, rimg->snapshot_id); + if (close_fd) + close(fd); + return -1; + } + } +} + +int64_t send_image(int fd, struct rimage *rimg, int flags, bool close_fd) +{ + + int n, nblocks = 0; + + if (flags != O_APPEND) { + rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + rimg->curr_sent_bytes = 0; + } + + while (1) { + n = send( + fd, + rimg->curr_sent_buf->buffer + rimg->curr_sent_bytes, + min(BUF_SIZE, rimg->curr_sent_buf->nbytes) - rimg->curr_sent_bytes, + MSG_NOSIGNAL); + if (n > -1) { + rimg->curr_sent_bytes += n; + if (rimg->curr_sent_bytes == BUF_SIZE) { + rimg->curr_sent_buf = + list_entry(rimg->curr_sent_buf->l.next, struct rbuf, l); + nblocks++; + rimg->curr_sent_bytes = 0; + } else if (rimg->curr_sent_bytes == rimg->curr_sent_buf->nbytes) { + if (close_fd) + close(fd); + return nblocks*BUF_SIZE + rimg->curr_sent_buf->nbytes; + } + } else if (errno == EPIPE || errno == ECONNRESET) { + pr_warn("Connection for %s:%s was closed early than expected\n", + rimg->path, rimg->snapshot_id); + return 0; + } else { + pr_perror("Write on %s:%s socket failed", + rimg->path, rimg->snapshot_id); + return -1; + } + } + +} + int read_remote_image_connection(char *snapshot_id, char *path) { - int error; + int error = 0; int sockfd = setup_UNIX_client_socket(restoring ? DEFAULT_CACHE_SOCKET: DEFAULT_PROXY_SOCKET); if (sockfd < 0) { diff --git a/criu/include/img-remote-proto.h b/criu/include/img-remote-proto.h deleted file mode 100644 index 13cf6c6d25..0000000000 --- a/criu/include/img-remote-proto.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef IMAGE_REMOTE_PVT_H -#define IMAGE_REMOTE_PVT_H - -#include -#include -#include "common/list.h" -#include "img-remote.h" -#include -#include - -#define DEFAULT_LISTEN 50 -#ifndef PAGESIZE -#define PAGESIZE 4096 -#endif -#define BUF_SIZE PAGESIZE - -struct rbuf { - char buffer[BUF_SIZE]; - int nbytes; /* How many bytes are in the buffer. */ - struct list_head l; -}; - -struct rimage { - char path[PATHLEN]; - char snapshot_id[PATHLEN]; - struct list_head l; - struct list_head buf_head; - /* Used to track already sent buffers when the image is appended. */ - struct rbuf *curr_sent_buf; - /* Similar to the previous field. Number of bytes sent in 'curr_sent_buf'. */ - int curr_sent_bytes; - uint64_t size; /* number of bytes */ - pthread_mutex_t in_use; /* Only one operation at a time, per image. */ -}; - -struct wthread { - pthread_t tid; - struct list_head l; - /* Client fd. */ - int fd; - /* The path and snapshot_id identify the request handled by this thread. */ - char path[PATHLEN]; - char snapshot_id[PATHLEN]; - int flags; - /* This semph is used to wake this thread if the image is in memory.*/ - sem_t wakeup_sem; -}; - -/* This variable is used to indicate when the dump is finished. */ -extern bool finished; -/* This is the proxy to cache TCP socket FD. */ -extern int proxy_to_cache_fd; -/* This the unix socket used to fulfill local requests. */ -extern int local_req_fd; - -int init_daemon(bool background, struct rimage *(*wfi)(struct wthread*)); - -void join_workers(void); -void unlock_workers(void); - -void prepare_recv_rimg(void); -void finalize_recv_rimg(struct rimage *rimg); -struct rimage *prepare_remote_image(char *path, char *namesapce, int flags); -struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path); -bool is_receiving(void); - -void *accept_local_image_connections(void *ptr); -void *accept_remote_image_connections(void *ptr); - -int64_t forward_image(struct rimage *rimg); -int64_t send_image(int fd, struct rimage *rimg, int flags, bool image_check); -int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool image_check); - -int64_t pb_write_obj(int fd, void *obj, int type); -int64_t pb_read_obj(int fd, void **obj, int type); - -int64_t write_header(int fd, char *snapshot_id, char *path, int open_mode); -int64_t read_header(int fd, char *snapshot_id, char *path, int *open_mode); -int64_t write_reply_header(int fd, int error); -int64_t read_reply_header(int fd, int *error); -int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *open_mode, uint64_t *size); -int64_t write_remote_header(int fd, char *snapshot_id, char *path, int open_mode, uint64_t size); - -int setup_TCP_server_socket(int port); -int setup_TCP_client_socket(char *hostname, int port); -int setup_UNIX_client_socket(char *path); -int setup_UNIX_server_socket(char *path); -#endif diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index 38acbd26de..1771d310b5 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -1,6 +1,11 @@ #include #include +#include +#include "common/list.h" +#include +#include + #ifndef IMAGE_REMOTE_H #define IMAGE_REMOTE_H @@ -14,6 +19,76 @@ #define DEFAULT_CACHE_PORT 9996 #define DEFAULT_CACHE_HOST "localhost" +#define DEFAULT_LISTEN 50 +#ifndef PAGESIZE +#define PAGESIZE 4096 +#endif +#define BUF_SIZE PAGESIZE + +struct rbuf { + char buffer[BUF_SIZE]; + int nbytes; /* How many bytes are in the buffer. */ + struct list_head l; +}; + +struct rimage { + char path[PATHLEN]; + char snapshot_id[PATHLEN]; + struct list_head l; + struct list_head buf_head; + /* Used to track already sent buffers when the image is appended. */ + struct rbuf *curr_sent_buf; + /* Similar to the previous field. Number of bytes sent in 'curr_sent_buf'. */ + int curr_sent_bytes; + uint64_t size; /* number of bytes */ + pthread_mutex_t in_use; /* Only one operation at a time, per image. */ +}; + +struct wthread { + pthread_t tid; + struct list_head l; + /* Client fd. */ + int fd; + /* The path and snapshot_id identify the request handled by this thread. */ + char path[PATHLEN]; + char snapshot_id[PATHLEN]; + int flags; + /* This semph is used to wake this thread if the image is in memory.*/ + sem_t wakeup_sem; +}; + +/* This variable is used to indicate when the dump is finished. */ +extern bool finished; +/* This is the proxy to cache TCP socket FD. */ +extern int proxy_to_cache_fd; +/* This the unix socket used to fulfill local requests. */ +extern int local_req_fd; + +int init_daemon(bool background, struct rimage *(*wfi)(struct wthread*)); + +void join_workers(void); +void unlock_workers(void); + +void prepare_recv_rimg(void); +void finalize_recv_rimg(struct rimage *rimg); +struct rimage *prepare_remote_image(char *path, char *namesapce, int flags); +struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path); +bool is_receiving(void); + +void *accept_local_image_connections(void *ptr); +void *accept_remote_image_connections(void *ptr); + +int64_t forward_image(struct rimage *rimg); +int64_t send_image(int fd, struct rimage *rimg, int flags, bool image_check); +int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool image_check); + +int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *open_mode, uint64_t *size); +int64_t write_remote_header(int fd, char *snapshot_id, char *path, int open_mode, uint64_t size); + +int setup_TCP_server_socket(int port); +int setup_TCP_client_socket(char *hostname, int port); +int setup_UNIX_server_socket(char *path); + /* Called by restore to get the fd correspondent to a particular path. This call * will block until the connection is received. */ From a7894a69a4923a01258179a8017b7e1826182413 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 6 Jul 2017 12:38:13 +0300 Subject: [PATCH 016/277] page-read: Don't try to dedup from img cache/proxy It's simply impossible (yet), so emit a warning. Acked-by: Mike Rapoport Signed-off-by: Pavel Emelyanov Signed-off-by: Andrei Vagin --- criu/pagemap.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/criu/pagemap.c b/criu/pagemap.c index 056a4dc660..17260151f9 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -425,11 +425,8 @@ static int maybe_read_page_img_cache(struct page_read *pr, unsigned long vaddr, break; } - if (opts.auto_dedup) { - ret = punch_hole(pr, pr->pi_off, len, false); - if (ret == -1) - return -1; - } + if (opts.auto_dedup) + pr_warn_once("Can't dedup from image cache\n"); if (ret == 0 && pr->io_complete) ret = pr->io_complete(pr, vaddr, nr); From f851c24154cfe490ec089b3bdb46aa5e5ca41a57 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 6 Jul 2017 12:38:30 +0300 Subject: [PATCH 017/277] page-read: Don't check for cache/proxy in local case The opts.remote is always false in this code. Acked-by: Mike Rapoport Signed-off-by: Pavel Emelyanov Signed-off-by: Andrei Vagin --- criu/pagemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/pagemap.c b/criu/pagemap.c index 17260151f9..854182f76f 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -392,7 +392,7 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, * for us for urgent async read, just do the regular * cached read. */ - if ((flags & (PR_ASYNC|PR_ASAP)) == PR_ASYNC && !opts.remote) + if ((flags & (PR_ASYNC|PR_ASAP)) == PR_ASYNC) ret = pagemap_enqueue_iovec(pr, buf, len, &pr->async); else { ret = read_local_page(pr, vaddr, len, buf); From be8c403bc089a8a84dc898aaa0928f795a9352bd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 6 Jul 2017 12:38:48 +0300 Subject: [PATCH 018/277] page-read: Warn about async read w/o completion cb Acked-by: Mike Rapoport Signed-off-by: Pavel Emelyanov Signed-off-by: Andrei Vagin --- criu/pagemap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/pagemap.c b/criu/pagemap.c index 854182f76f..3c3930b7b5 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -449,6 +449,8 @@ static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_ if (pr->io_complete) ret = pr->io_complete(pr, vaddr, nr_pages); + else + pr_warn_once("Remote page read w/o io_complete!\n"); return ret; } From 2dfec7f5645e9efc3a5fd06ac38239315a1b9c1c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 6 Jul 2017 12:40:39 +0300 Subject: [PATCH 019/277] page-xfer: Normalize remote/local parent xfer checks We have two places to check for parent via page server -- as a part of _OPEN req and explicit req. Make the latter code be in-sync with the opening one. Acked-by: Mike Rapoport Signed-off-by: Pavel Emelyanov Signed-off-by: Andrei Vagin --- criu/page-xfer.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/criu/page-xfer.c b/criu/page-xfer.c index b826861e88..3e89feed9e 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -550,16 +550,12 @@ int check_parent_local_xfer(int fd_type, unsigned long img_id) struct stat st; int ret, pfd; - if (opts.remote) { - pfd = get_curr_parent_snapshot_id_idx(); - pr_err("Unable to get parent snapshot id\n"); - if (pfd == -1) - return -1; - } else { - pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); - if (pfd < 0 && errno == ENOENT) - return 0; - } + if (opts.remote) + return get_curr_parent_snapshot_id_idx() == -1 ? 0 : 1; + + pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); + if (pfd < 0 && errno == ENOENT) + return 0; snprintf(path, sizeof(path), imgset_template[fd_type].fmt, img_id); ret = fstatat(pfd, path, &st, 0); @@ -622,8 +618,6 @@ int check_parent_page_xfer(int fd_type, unsigned long img_id) { if (opts.use_page_server) return check_parent_server_xfer(fd_type, img_id); - else if (opts.remote) - return get_curr_parent_snapshot_id_idx() == -1 ? 0 : 1; else return check_parent_local_xfer(fd_type, img_id); } From 30e6dd6480b43bacce866e0702e595c341729a47 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 15 Aug 2017 18:28:13 +0300 Subject: [PATCH 020/277] files,remote: Support chunked ghost files Those may not support sendfiles, so use read/write-s instead Signed-off-by: Pavel Emelyanov --- criu/files-reg.c | 61 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 2f68bc03fe..97dc7030eb 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -155,11 +155,30 @@ static int copy_chunk_from_file(int fd, int img, off_t off, size_t len) char *buf = NULL; int ret; - while (len > 0) { - ret = sendfile(img, fd, &off, len); - if (ret <= 0) { - pr_perror("Can't send ghost to image"); + if (opts.remote) { + buf = xmalloc(BUFSIZE); + if (!buf) return -1; + } + + while (len > 0) { + if (opts.remote) { + ret = pread(fd, buf, min_t(size_t, BUFSIZE, len), off); + if (ret <= 0) { + pr_perror("Can't read from ghost file"); + return -1; + } + if (write(img, buf, ret) != ret) { + pr_perror("Can't write to image"); + return -1; + } + off += ret; + } else { + ret = sendfile(img, fd, &off, len); + if (ret <= 0) { + pr_perror("Can't send ghost to image"); + return -1; + } } len -= ret; @@ -214,15 +233,33 @@ static int copy_chunk_to_file(int img, int fd, off_t off, size_t len) char *buf = NULL; int ret; - while (len > 0) { - if (lseek(fd, off, SEEK_SET) < 0) { - pr_perror("Can't seek file"); - return -1; - } - ret = sendfile(fd, img, NULL, len); - if (ret < 0) { - pr_perror("Can't send data"); + if (opts.remote) { + buf = xmalloc(BUFSIZE); + if (!buf) return -1; + } + + while (len > 0) { + if (opts.remote) { + ret = read(img, buf, min_t(size_t, BUFSIZE, len)); + if (ret <= 0) { + pr_perror("Can't read from image"); + return -1; + } + if (pwrite(fd, buf, ret, off) != ret) { + pr_perror("Can't write to file"); + return -1; + } + } else { + if (lseek(fd, off, SEEK_SET) < 0) { + pr_perror("Can't seek file"); + return -1; + } + ret = sendfile(fd, img, NULL, len); + if (ret < 0) { + pr_perror("Can't send data"); + return -1; + } } off += ret; From 860b6e6119be1d1b77ca7e9593310e47bb33b097 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 20 Jan 2018 19:07:26 +0000 Subject: [PATCH 021/277] img-remote: Fix typo in comment Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index f812c52d52..91e18a1a2f 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -49,7 +49,7 @@ int local_req_fd; LIST_HEAD(snapshot_head); /* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an - * ID which corresponds to the working directory specefied by the user. + * ID which corresponds to the working directory specified by the user. */ struct snapshot { char snapshot_id[PATHLEN]; @@ -531,7 +531,7 @@ static void *process_local_read(struct wthread *wt) struct rimage *rimg = NULL; int64_t ret; /* TODO - split wait_for_image - * in cache - improve the parent stuf + * in cache - improve the parent stuff * in proxy - do not wait for anything, return no file */ rimg = wait_for_image(wt); From fe0147b188b9961e88d9eda06365e583b35d72c5 Mon Sep 17 00:00:00 2001 From: Rodrigo Bruno Date: Mon, 14 May 2018 01:29:47 +0100 Subject: [PATCH 022/277] remote: Preparing image receive and send for asynchronous sockets. --- criu/img-remote.c | 93 +++++++++++++++++++++++++++++---------- criu/include/img-remote.h | 28 ++++++++++-- 2 files changed, 93 insertions(+), 28 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 91e18a1a2f..7242b0c8de 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -470,8 +470,6 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) buf->nbytes = 0; INIT_LIST_HEAD(&(rimg->buf_head)); list_add_tail(&(buf->l), &(rimg->buf_head)); - rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); - rimg->curr_sent_bytes = 0; if (pthread_mutex_init(&(rimg->in_use), NULL) != 0) { pr_err("Remote image in_use mutex init failed\n"); @@ -498,8 +496,6 @@ static struct rimage *clear_remote_image(struct rimage *rimg) list_entry(rimg->buf_head.next, struct rbuf, l)->nbytes = 0; rimg->size = 0; - rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); - rimg->curr_sent_bytes = 0; pthread_mutex_unlock(&(rimg->in_use)); @@ -669,18 +665,43 @@ void *accept_local_image_connections(void *port) return NULL; } + +int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool close_fd) +{ + int ret; + struct roperation *op = malloc(sizeof(struct roperation)); + bzero(op, sizeof(struct roperation)); + op->fd = fd; + op->rimg = rimg; + op->size = size; + op->flags = flags; + op->close_fd = close_fd; + op->curr_recv_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + while ((ret = recv_image_async(op)) < 0) + if (ret != EAGAIN && ret != EWOULDBLOCK) + return -1; + return ret; +} + /* Note: size is a limit on how much we want to read from the socket. Zero means * read until the socket is closed. */ -int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool close_fd) +int64_t recv_image_async(struct roperation *op) { - struct rbuf *curr_buf = NULL; + int fd = op->fd; + struct rimage *rimg = op->rimg; + uint64_t size = op->size; + int flags = op->flags; + bool close_fd = op->close_fd; + struct rbuf *curr_buf = op->curr_recv_buf; int n; - if (flags == O_APPEND) - curr_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); - else - curr_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + if (curr_buf == NULL) { + if (flags == O_APPEND) + curr_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); + else + curr_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + } while (1) { n = read(fd, @@ -712,6 +733,8 @@ int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool c close(fd); return rimg->size; } + } else if (errno == EAGAIN || errno == EWOULDBLOCK) { + return errno; } else { pr_perror("Read on %s:%s socket failed", rimg->path, rimg->snapshot_id); @@ -724,37 +747,59 @@ int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool c int64_t send_image(int fd, struct rimage *rimg, int flags, bool close_fd) { + int ret; + struct roperation *op = malloc(sizeof(struct roperation)); + bzero(op, sizeof(struct roperation)); + op->fd = fd; + op->rimg = rimg; + op->flags = flags; + op->close_fd = close_fd; + op->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + while ((ret = send_image_async(op)) < 0) + if (ret != EAGAIN && ret != EWOULDBLOCK) + return -1; + return ret; +} - int n, nblocks = 0; +int64_t send_image_async(struct roperation *op) +{ + int fd = op->fd; + struct rimage *rimg = op->rimg; + int flags = op->flags; + bool close_fd = op->close_fd; + int n; if (flags != O_APPEND) { - rimg->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); - rimg->curr_sent_bytes = 0; + op->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + op->curr_sent_bytes = 0; } while (1) { n = send( fd, - rimg->curr_sent_buf->buffer + rimg->curr_sent_bytes, - min(BUF_SIZE, rimg->curr_sent_buf->nbytes) - rimg->curr_sent_bytes, + op->curr_sent_buf->buffer + op->curr_sent_bytes, + min(BUF_SIZE, op->curr_sent_buf->nbytes) - op->curr_sent_bytes, MSG_NOSIGNAL); if (n > -1) { - rimg->curr_sent_bytes += n; - if (rimg->curr_sent_bytes == BUF_SIZE) { - rimg->curr_sent_buf = - list_entry(rimg->curr_sent_buf->l.next, struct rbuf, l); - nblocks++; - rimg->curr_sent_bytes = 0; - } else if (rimg->curr_sent_bytes == rimg->curr_sent_buf->nbytes) { + op->curr_sent_bytes += n; + if (op->curr_sent_bytes == BUF_SIZE) { + op->curr_sent_buf = + list_entry(op->curr_sent_buf->l.next, struct rbuf, l); + op->nblocks++; + op->curr_sent_bytes = 0; + } else if (op->curr_sent_bytes == op->curr_sent_buf->nbytes) { if (close_fd) close(fd); - return nblocks*BUF_SIZE + rimg->curr_sent_buf->nbytes; + return op->nblocks*BUF_SIZE + op->curr_sent_buf->nbytes; } } else if (errno == EPIPE || errno == ECONNRESET) { pr_warn("Connection for %s:%s was closed early than expected\n", rimg->path, rimg->snapshot_id); return 0; - } else { + } else if (errno == EAGAIN || errno == EWOULDBLOCK) { + return errno; + } + else { pr_perror("Write on %s:%s socket failed", rimg->path, rimg->snapshot_id); return -1; diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index 1771d310b5..0947e7f0ca 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -36,10 +36,6 @@ struct rimage { char snapshot_id[PATHLEN]; struct list_head l; struct list_head buf_head; - /* Used to track already sent buffers when the image is appended. */ - struct rbuf *curr_sent_buf; - /* Similar to the previous field. Number of bytes sent in 'curr_sent_buf'. */ - int curr_sent_bytes; uint64_t size; /* number of bytes */ pthread_mutex_t in_use; /* Only one operation at a time, per image. */ }; @@ -57,6 +53,28 @@ struct wthread { sem_t wakeup_sem; }; +/* Structure that describes the state of a remote operation on remote images. */ +struct roperation { + /* File descriptor being used. */ + int fd; + /* Remote image being used. */ + struct rimage *rimg; + /* Flags for the operation. */ + int flags; + /* If fd should be closed when the operation is done. */ + bool close_fd; + /* Note: recv operation only. How much bytes should be received. */ + uint64_t size; + /* Note: recv operation only. Buffer being writen. */ + struct rbuf *curr_recv_buf; + /* Note: send operation only. Number of blocks already sent. */ + int nblocks; + /* Note: send operation only. Pointer to buffer being sent. */ + struct rbuf *curr_sent_buf; + /* Note: send operation only. Number of bytes sent in 'curr_send_buf. */ + uint64_t curr_sent_bytes; +}; + /* This variable is used to indicate when the dump is finished. */ extern bool finished; /* This is the proxy to cache TCP socket FD. */ @@ -80,7 +98,9 @@ void *accept_remote_image_connections(void *ptr); int64_t forward_image(struct rimage *rimg); int64_t send_image(int fd, struct rimage *rimg, int flags, bool image_check); +int64_t send_image_async(struct roperation *op); int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool image_check); +int64_t recv_image_async(struct roperation *op); int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *open_mode, uint64_t *size); int64_t write_remote_header(int fd, char *snapshot_id, char *path, int open_mode, uint64_t size); From b23ae7e94f861ab0561731fc611bce05919745ea Mon Sep 17 00:00:00 2001 From: Rodrigo Bruno Date: Mon, 14 May 2018 01:29:48 +0100 Subject: [PATCH 023/277] remote: Unix socket for local connections is async. --- criu/img-cache.c | 2 + criu/img-proxy.c | 2 + criu/img-remote.c | 158 ++++++++++++++++++++++++++++---------- criu/include/img-remote.h | 1 + 4 files changed, 124 insertions(+), 39 deletions(-) diff --git a/criu/img-cache.c b/criu/img-cache.c index 7020a30f02..7b828b9b63 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -125,6 +125,8 @@ int image_cache(bool background, char *local_cache_path, unsigned short cache_wr return -1; } + socket_set_non_blocking(local_req_fd); + if (init_daemon(background, wait_for_image)) { pr_perror("Unable to initialize daemon"); return -1; diff --git a/criu/img-proxy.c b/criu/img-proxy.c index f56073b4e4..b63d69a033 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -60,6 +60,8 @@ int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigne return -1; } + socket_set_non_blocking(local_req_fd); + if (opts.ps_socket != -1) { proxy_to_cache_fd = opts.ps_socket; pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); diff --git a/criu/img-remote.c b/criu/img-remote.c index 7242b0c8de..ed8587996f 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include "xmalloc.h" @@ -24,6 +25,7 @@ #include "image.h" #define PB_LOCAL_IMAGE_SIZE PATHLEN +#define EPOLL_MAX_EVENTS 50 static char *snapshot_id; bool restoring = true; @@ -236,6 +238,8 @@ int setup_TCP_server_socket(int port) return -1; } + + int setup_TCP_client_socket(char *hostname, int port) { int sockfd; @@ -272,10 +276,33 @@ int setup_TCP_client_socket(char *hostname, int port) return -1; } +int event_set(int epoll_fd, int op, int fd, uint32_t events, void *data) +{ + struct epoll_event event; + event.events = events; + event.data.ptr = data; + // TODO - check if this is okay to send a stack allocated object! + return epoll_ctl(epoll_fd, op, fd, &event); +} + +void socket_set_non_blocking(int fd) +{ + int flags = fcntl(fd, F_GETFL, NULL); + + if (flags < 0) { + pr_perror("Failed to obtain flags from fd %d", fd); + return; + } + flags |= O_NONBLOCK; + + if (fcntl(fd, F_SETFL, flags) < 0) + pr_perror("Failed to set flags for fd %d", fd); +} + int setup_UNIX_server_socket(char *path) { struct sockaddr_un addr; - int sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + int sockfd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0); if (sockfd < 0) { pr_perror("Unable to open image socket"); @@ -609,59 +636,111 @@ static void *process_local_image_connection(void *ptr) return NULL; } - -void *accept_local_image_connections(void *port) +void handle_local_accept(int fd) { - int fd = *((int *) port); + struct wthread *wt = NULL; int cli_fd; + pthread_t tid; struct sockaddr_in cli_addr; - socklen_t clilen = sizeof(cli_addr); - pthread_t tid; - struct wthread *wt; - while (1) { - cli_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); - if (cli_fd < 0) { - if (!finished) - pr_perror("Unable to accept local image connection"); - close(cli_fd); - return NULL; - } + cli_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); + if (cli_fd < 0) { + pr_perror("Unable to accept local image connection"); + goto err; + } - wt = new_worker(); - wt->fd = cli_fd; + wt = new_worker(); + wt->fd = cli_fd; - if (read_header(wt->fd, wt->snapshot_id, wt->path, &(wt->flags)) < 0) { - pr_err("Error reading local image header\n"); - goto err; - } + if (read_header(wt->fd, wt->snapshot_id, wt->path, &(wt->flags)) < 0) { + pr_err("Error reading local image header\n"); + goto err; + } + + /* These function calls are used to avoid other threads from + * thinking that there are no more images are coming. + */ + if (wt->flags != O_RDONLY) { + prepare_recv_rimg(); + prepare_fwd_rimg(); + } - pr_info("Received %s request for %s:%s\n", - wt->flags == O_RDONLY ? "read" : + pr_info("Received %s request for %s:%s\n", + wt->flags == O_RDONLY ? "read" : wt->flags == O_APPEND ? "append" : "write", - wt->path, wt->snapshot_id); - - /* These function calls are used to avoid other threads from - * thinking that there are no more images are coming. - */ - if (wt->flags != O_RDONLY) { - prepare_recv_rimg(); - prepare_fwd_rimg(); - } + wt->path, wt->snapshot_id); - if (pthread_create( - &tid, NULL, process_local_image_connection, (void *) wt)) { - pr_perror("Unable to create worker thread"); - goto err; - } - wt->tid = tid; - add_worker(wt); + if (pthread_create( + &tid, NULL, process_local_image_connection, (void *) wt)) { + pr_perror("Unable to create worker thread"); + goto err; } + wt->tid = tid; + add_worker(wt); + return; err: close(cli_fd); free(wt); +} + + +void *accept_local_image_connections(void *port) +{ + int fd = *((int *) port); + int epoll_fd; + struct epoll_event *events; + int ret; + + epoll_fd = epoll_create(EPOLL_MAX_EVENTS); + if (epoll_fd < 0) { + pr_perror("Unable to open epoll"); + return NULL; + } + + events = calloc(EPOLL_MAX_EVENTS, sizeof(struct epoll_event)); + if (events == NULL) { + pr_perror("Failed to allocated epoll events"); + goto end; + } + + ret = event_set(epoll_fd, EPOLL_CTL_ADD, fd, EPOLLIN, &fd); + if (ret) { + pr_perror("Failed to set event for epoll"); + goto end; + } + + while (1) { + int n_events = epoll_wait(epoll_fd, events, EPOLL_MAX_EVENTS, -1); + if (n_events < 0) { + pr_perror("Failed to epoll wait"); + goto end; + } + + for (int i = 0; i < n_events; i++) { + if (events[i].data.ptr == &fd) { + if ( events[i].events & EPOLLHUP || + events[i].events & EPOLLERR) { + if (!finished) + pr_perror("Unable to accept more local image connections"); + goto end; + } + // accept + pr_perror("Calling accept %d", i); + handle_local_accept(fd); + } + else { + // TODO - handle write/read + pr_perror("Event on unexpected file descripor"); + goto end; + } + } + } +end: + close(epoll_fd); + close(fd); + free(events); return NULL; } @@ -680,6 +759,7 @@ int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool c while ((ret = recv_image_async(op)) < 0) if (ret != EAGAIN && ret != EWOULDBLOCK) return -1; + free(op); return ret; } diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index 0947e7f0ca..779a137fcd 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -108,6 +108,7 @@ int64_t write_remote_header(int fd, char *snapshot_id, char *path, int open_mode int setup_TCP_server_socket(int port); int setup_TCP_client_socket(char *hostname, int port); int setup_UNIX_server_socket(char *path); +void socket_set_non_blocking(int fd); /* Called by restore to get the fd correspondent to a particular path. This call * will block until the connection is received. From e9732fa449ecf9c721b09287142e16debf6dd660 Mon Sep 17 00:00:00 2001 From: Rodrigo Bruno Date: Mon, 14 May 2018 01:29:49 +0100 Subject: [PATCH 024/277] remote: Unblocking implementation of img cache and proxy seems to be working. --- criu/img-cache.c | 137 +----- criu/img-proxy.c | 65 +-- criu/img-remote.c | 961 +++++++++++++++++++++++--------------- criu/include/img-remote.h | 53 +-- 4 files changed, 629 insertions(+), 587 deletions(-) diff --git a/criu/img-cache.c b/criu/img-cache.c index 7b828b9b63..c941f14e25 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -8,105 +8,25 @@ #include #include "cr_options.h" -static struct rimage *wait_for_image(struct wthread *wt) +int accept_proxy_to_cache(int sockfd) { - struct rimage *result; + struct sockaddr_in cli_addr; + socklen_t clilen = sizeof(cli_addr); + int proxy_fd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen); - if (!strncmp(wt->path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) { - finished = true; - shutdown(local_req_fd, SHUT_RD); - return NULL; - } - - result = get_rimg_by_name(wt->snapshot_id, wt->path); - if (result != NULL && result->size > 0) - return result; - - /* The file does not exist and we do not expect new files */ - if (finished && !is_receiving()) - return NULL; - - /* NOTE: at this point, when the thread wakes up, either the image is - * already in memory or it will never come (the dump is finished). - */ - sem_wait(&(wt->wakeup_sem)); - result = get_rimg_by_name(wt->snapshot_id, wt->path); - if (result != NULL && result->size > 0) - return result; - else - return NULL; -} - -/* The image cache creates a thread that calls this function. It waits for remote - * images from the image-cache. - */ -void *accept_remote_image_connections(void *port) -{ - int fd = *((int *) port); - struct sockaddr_in cli_addr; - socklen_t clilen = sizeof(cli_addr); - char snapshot_id_buf[PATHLEN], path_buf[PATHLEN]; - uint64_t size; - int64_t ret; - int flags, proxy_fd; - struct rimage *rimg; - - proxy_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); - if (proxy_fd < 0) { - pr_perror("Unable to accept remote image connection from image proxy"); - return NULL; - } - while (1) { - ret = read_remote_header(proxy_fd, snapshot_id_buf, path_buf, &flags, &size); - if (ret < 0) { - pr_perror("Unable to receive remote header from image proxy"); - return NULL; - } - /* This means that the no more images are coming. */ - else if (!ret) { - pr_info("Image Proxy connection closed.\n"); - finished = true; - unlock_workers(); - return NULL; - } - - pr_info("Received %s request for %s:%s\n", - flags == O_RDONLY ? "read" : - flags == O_APPEND ? "append" : "write", - path_buf, snapshot_id_buf); + if (proxy_fd < 0) { + pr_perror("Unable to accept remote image connection from image proxy"); + return -1; + } - rimg = prepare_remote_image(path_buf, snapshot_id_buf, flags); - - prepare_recv_rimg(); - if (!size) - ret = 0; - else - ret = recv_image(proxy_fd, rimg, size, flags, false); - if (ret < 0) { - pr_perror("Unable to receive %s:%s from image proxy", - rimg->path, rimg->snapshot_id); - finalize_recv_rimg(NULL); - return NULL; - } else if (ret != size) { - pr_perror("Unable to receive %s:%s from image proxy (received %ld bytes, expected %lu bytes)", - rimg->path, rimg->snapshot_id, (long)ret, (unsigned long)size); - finalize_recv_rimg(NULL); - return NULL; - } - finalize_recv_rimg(rimg); - - pr_info("Finished receiving %s:%s (received %ld bytes)\n", - rimg->path, rimg->snapshot_id, (long)ret); - } + return proxy_fd; } int image_cache(bool background, char *local_cache_path, unsigned short cache_write_port) { - pthread_t local_req_thr, remote_req_thr; - pr_info("Proxy to Cache Port %d, CRIU to Cache Path %s\n", cache_write_port, local_cache_path); - + restoring = true; if (opts.ps_socket != -1) { proxy_to_cache_fd = opts.ps_socket; @@ -117,40 +37,29 @@ int image_cache(bool background, char *local_cache_path, unsigned short cache_wr pr_perror("Unable to open proxy to cache TCP socket"); return -1; } + // Wait to accept connection from proxy. + proxy_to_cache_fd = accept_proxy_to_cache(proxy_to_cache_fd); + if (proxy_to_cache_fd < 0) + return -1; // TODO - should close other sockets. } + pr_info("Cache is connected to Proxy through fd %d\n", proxy_to_cache_fd); + local_req_fd = setup_UNIX_server_socket(local_cache_path); if (local_req_fd < 0) { pr_perror("Unable to open cache to proxy UNIX socket"); - return -1; - } - - socket_set_non_blocking(local_req_fd); + return -1; // TODO - should close other sockets. - if (init_daemon(background, wait_for_image)) { - pr_perror("Unable to initialize daemon"); - return -1; } - if (pthread_create( - &remote_req_thr, - NULL, accept_remote_image_connections, - (void *) &proxy_to_cache_fd)) { - pr_perror("Unable to create remote requests thread"); - return -1; - } - if (pthread_create( - &local_req_thr, - NULL, - accept_local_image_connections, - (void *) &local_req_fd)) { - pr_perror("Unable to create local requests thread"); - return -1; + if (background) { + if (daemon(1, 0) == -1) { + pr_perror("Can't run service server in the background"); + return -1; + } } - pthread_join(remote_req_thr, NULL); - pthread_join(local_req_thr, NULL); - join_workers(); + accept_image_connections(); pr_info("Finished image cache."); return 0; } diff --git a/criu/img-proxy.c b/criu/img-proxy.c index b63d69a033..9551a7dcbc 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -8,51 +8,11 @@ #include #include "cr_options.h" -static struct rimage *wait_for_image(struct wthread *wt) -{ - return get_rimg_by_name(wt->snapshot_id, wt->path); -} - -int64_t forward_image(struct rimage *rimg) -{ - int64_t ret; - int fd = proxy_to_cache_fd; - - pthread_mutex_lock(&(rimg->in_use)); - pr_info("Forwarding %s:%s (%lu bytes)\n", - rimg->path, rimg->snapshot_id, (unsigned long)rimg->size); - if (write_remote_header( - fd, rimg->snapshot_id, rimg->path, O_APPEND, rimg->size) < 0) { - pr_perror("Error writing header for %s:%s", - rimg->path, rimg->snapshot_id); - pthread_mutex_unlock(&(rimg->in_use)); - return -1; - } - - ret = send_image(fd, rimg, O_APPEND, false); - if (ret < 0) { - pr_perror("Unable to send %s:%s to image cache", - rimg->path, rimg->snapshot_id); - pthread_mutex_unlock(&(rimg->in_use)); - return -1; - } else if (ret != rimg->size) { - pr_perror("Unable to send %s:%s to image proxy (sent %ld bytes, expected %lu bytes", - rimg->path, rimg->snapshot_id, (long)ret, (unsigned long)rimg->size); - pthread_mutex_unlock(&(rimg->in_use)); - return -1; - } - pr_info("Finished forwarding %s:%s (sent %lu bytes)\n", - rimg->path, rimg->snapshot_id, (unsigned long)rimg->size); - pthread_mutex_unlock(&(rimg->in_use)); - return ret; -} - int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigned short fwd_port) { - pthread_t local_req_thr; - pr_info("CRIU to Proxy Path: %s, Cache Address %s:%hu\n", local_proxy_path, fwd_host, fwd_port); + restoring = false; local_req_fd = setup_UNIX_server_socket(local_proxy_path); if (local_req_fd < 0) { @@ -60,8 +20,6 @@ int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigne return -1; } - socket_set_non_blocking(local_req_fd); - if (opts.ps_socket != -1) { proxy_to_cache_fd = opts.ps_socket; pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); @@ -69,24 +27,21 @@ int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigne proxy_to_cache_fd = setup_TCP_client_socket(fwd_host, fwd_port); if (proxy_to_cache_fd < 0) { pr_perror("Unable to open proxy to cache TCP socket"); - return -1; + return -1; // TODO - should close other sockets. } } - if (init_daemon(background, wait_for_image)) - return -1; + pr_info("Proxy is connected to Cache through fd %d\n", proxy_to_cache_fd); - if (pthread_create( - &local_req_thr, - NULL, - accept_local_image_connections, - (void *) &local_req_fd)) { - pr_perror("Unable to create local requests thread"); - return -1; + if (background) { + if (daemon(1, 0) == -1) { + pr_perror("Can't run service server in the background"); + return -1; + } } - pthread_join(local_req_thr, NULL); - join_workers(); + // TODO - local_req_fd and proxy_to_cache_fd send as args. + accept_image_connections(); pr_info("Finished image proxy."); return 0; } diff --git a/criu/img-remote.c b/criu/img-remote.c index ed8587996f..218c29684b 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -27,29 +27,35 @@ #define PB_LOCAL_IMAGE_SIZE PATHLEN #define EPOLL_MAX_EVENTS 50 -static char *snapshot_id; -bool restoring = true; - +// List of images already in memory. LIST_HEAD(rimg_head); -pthread_mutex_t rimg_lock = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t proxy_to_cache_lock = PTHREAD_MUTEX_INITIALIZER; +// List of local operations currently in-progess. +LIST_HEAD(rop_inprogress); -LIST_HEAD(workers_head); -pthread_mutex_t workers_lock = PTHREAD_MUTEX_INITIALIZER; -sem_t workers_semph; +// List of local operations pending (reads on the restore side for images that +// still haven't arrived). -struct rimage * (*wait_for_image) (struct wthread *wt); +LIST_HEAD(rop_pending); +// List of images waiting to be forwarded. The head of the list is currently +// being forwarded. +LIST_HEAD(rop_forwarding); + +// List of snapshots (useful when doing incremental restores/dumps +LIST_HEAD(snapshot_head); -bool finished = false; -int writing = 0; -int forwarding = 0; +static char *snapshot_id; +bool restoring = true; // TODO - check where this is used! +// TODO - split this into two vars, recv_from_proxy, send_to_cache +bool forwarding = false; // TODO - true if proxy_to_cache_fd is being used. +bool finished_local = false; +bool finished_remote = false; int proxy_to_cache_fd; int local_req_fd; +int epoll_fd; +struct epoll_event *events; -LIST_HEAD(snapshot_head); - /* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an * ID which corresponds to the working directory specified by the user. */ @@ -79,125 +85,27 @@ struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path) { struct rimage *rimg = NULL; - pthread_mutex_lock(&rimg_lock); list_for_each_entry(rimg, &rimg_head, l) { if (!strncmp(rimg->path, path, PATHLEN) && !strncmp(rimg->snapshot_id, snapshot_id, PATHLEN)) { - pthread_mutex_unlock(&rimg_lock); return rimg; } } - pthread_mutex_unlock(&rimg_lock); - return NULL; -} - -static struct wthread *get_wt_by_name(const char *snapshot_id, const char *path) -{ - struct wthread *wt = NULL; - - pthread_mutex_lock(&workers_lock); - list_for_each_entry(wt, &workers_head, l) { - if (!strncmp(wt->path, path, PATHLEN) && - !strncmp(wt->snapshot_id, snapshot_id, PATHLEN)) { - pthread_mutex_unlock(&workers_lock); - return wt; - } - } - pthread_mutex_unlock(&workers_lock); return NULL; } -static int init_sync_structures(void) -{ - if (sem_init(&workers_semph, 0, 0) != 0) { - pr_perror("Workers semaphore init failed"); - return -1; - } - - return 0; -} - -void prepare_recv_rimg(void) -{ - pthread_mutex_lock(&rimg_lock); - writing++; - pthread_mutex_unlock(&rimg_lock); -} - -void finalize_recv_rimg(struct rimage *rimg) -{ - - pthread_mutex_lock(&rimg_lock); - - if (rimg) - list_add_tail(&(rimg->l), &rimg_head); - writing--; - pthread_mutex_unlock(&rimg_lock); - /* Wake thread waiting for this image. */ - if (rimg) { - struct wthread *wt = get_wt_by_name(rimg->snapshot_id, rimg->path); - if (wt) - sem_post(&(wt->wakeup_sem)); - } -} - -bool is_receiving(void) -{ - int ret; - - pthread_mutex_lock(&rimg_lock); - ret = writing; - pthread_mutex_unlock(&rimg_lock); - return ret > 0; -} - -static void prepare_fwd_rimg(void) -{ - pthread_mutex_lock(&rimg_lock); - forwarding++; - pthread_mutex_unlock(&rimg_lock); -} - -static void finalize_fwd_rimg(void) -{ - pthread_mutex_lock(&rimg_lock); - forwarding--; - pthread_mutex_unlock(&rimg_lock); -} - -static bool is_forwarding(void) -{ - int ret; - - pthread_mutex_lock(&rimg_lock); - ret = forwarding; - pthread_mutex_unlock(&rimg_lock); - return ret > 0; -} - -/* This function is called when no more images are coming. Threads still waiting - * for images will be awaken to send a ENOENT (no such file) to the requester. - */ -void unlock_workers(void) +struct roperation *get_rop_by_name( + struct list_head *head, const char *snapshot_id, const char *path) { - struct wthread *wt = NULL; - - pthread_mutex_lock(&workers_lock); - list_for_each_entry(wt, &workers_head, l) - sem_post(&(wt->wakeup_sem)); - pthread_mutex_unlock(&workers_lock); -} + struct roperation *rop = NULL; -int init_daemon(bool background, struct rimage *(*wfi)(struct wthread*)) -{ - if (background) { - if (daemon(1, 0) == -1) { - pr_perror("Can't run service server in the background"); - return -1; + list_for_each_entry(rop, head, l) { + if (!strncmp(rop->path, path, PATHLEN) && + !strncmp(rop->snapshot_id, snapshot_id, PATHLEN)) { + return rop; } } - wait_for_image = wfi; - return init_sync_structures(); + return NULL; } int setup_TCP_server_socket(int port) @@ -278,11 +186,15 @@ int setup_TCP_client_socket(char *hostname, int port) int event_set(int epoll_fd, int op, int fd, uint32_t events, void *data) { + int ret; struct epoll_event event; event.events = events; event.data.ptr = data; - // TODO - check if this is okay to send a stack allocated object! - return epoll_ctl(epoll_fd, op, fd, &event); + + ret = epoll_ctl(epoll_fd, op, fd, &event); + if (ret) + pr_perror("[fd=%d] Unable to set event", fd); + return ret; } void socket_set_non_blocking(int fd) @@ -299,6 +211,20 @@ void socket_set_non_blocking(int fd) pr_perror("Failed to set flags for fd %d", fd); } +void socket_set_blocking(int fd) +{ + int flags = fcntl(fd, F_GETFL, NULL); + + if (flags < 0) { + pr_perror("Failed to obtain flags from fd %d", fd); + return; + } + flags &= (~O_NONBLOCK); + + if (fcntl(fd, F_SETFL, flags) < 0) + pr_perror("Failed to set flags for fd %d", fd); +} + int setup_UNIX_server_socket(char *path) { struct sockaddr_un addr; @@ -441,48 +367,10 @@ int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *flags, ui return ret; } -static struct wthread *new_worker(void) -{ - struct wthread *wt = malloc(sizeof(struct wthread)); - - if (!wt) { - pr_perror("Unable to allocate worker thread structure"); - goto err; - } - if (sem_init(&(wt->wakeup_sem), 0, 0) != 0) { - pr_perror("Workers semaphore init failed"); - goto err; - } - return wt; -err: - free(wt); - return NULL; -} - -static void add_worker(struct wthread *wt) -{ - pthread_mutex_lock(&workers_lock); - list_add_tail(&(wt->l), &workers_head); - pthread_mutex_unlock(&workers_lock); - sem_post(&workers_semph); -} - -void join_workers(void) -{ - struct wthread *wthread = NULL; - - while (! list_empty(&workers_head)) { - wthread = list_entry(workers_head.next, struct wthread, l); - pthread_join(wthread->tid, NULL); - list_del(&(wthread->l)); - free(wthread); - } -} - static struct rimage *new_remote_image(char *path, char *snapshot_id) { - struct rimage *rimg = malloc(sizeof(struct rimage)); - struct rbuf *buf = malloc(sizeof(struct rbuf)); + struct rimage *rimg = calloc(1, sizeof(struct rimage)); + struct rbuf *buf = calloc(1, sizeof(struct rbuf)); if (rimg == NULL || buf == NULL) { pr_perror("Unable to allocate remote image structures"); @@ -490,18 +378,13 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) } strncpy(rimg->path, path, PATHLEN -1 ); - rimg->path[PATHLEN - 1] = '\0'; strncpy(rimg->snapshot_id, snapshot_id, PATHLEN - 1); + rimg->path[PATHLEN - 1] = '\0'; rimg->snapshot_id[PATHLEN - 1] = '\0'; - rimg->size = 0; - buf->nbytes = 0; INIT_LIST_HEAD(&(rimg->buf_head)); list_add_tail(&(buf->l), &(rimg->buf_head)); + rimg->curr_fwd_buf = buf; - if (pthread_mutex_init(&(rimg->in_use), NULL) != 0) { - pr_err("Remote image in_use mutex init failed\n"); - goto err; - } return rimg; err: free(rimg); @@ -509,11 +392,56 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) return NULL; } +static struct roperation *new_remote_operation( + char *path, char *snapshot_id, int cli_fd, int flags, bool close_fd) +{ + struct roperation *rop = calloc(1, sizeof(struct roperation)); + + if (rop == NULL) { + pr_perror("Unable to allocate remote operation structures"); + return NULL; + } + strncpy(rop->path, path, PATHLEN -1 ); + strncpy(rop->snapshot_id, snapshot_id, PATHLEN - 1); + rop->path[PATHLEN - 1] = '\0'; + rop->snapshot_id[PATHLEN - 1] = '\0'; + rop->fd = cli_fd; + rop->flags = flags; + rop->close_fd = close_fd; + + return rop; +} + +static void rop_set_rimg(struct roperation* rop, struct rimage* rimg) +{ + rop->rimg = rimg; + rop->size = rimg->size; + if (rop->flags == O_APPEND) { + // Image forward on append must start where the last fwd finished. + if (rop->fd == proxy_to_cache_fd) { + rop->curr_sent_buf = rimg->curr_fwd_buf; + rop->curr_sent_bytes = rimg->curr_fwd_bytes; + } + // For local appends, just write at the end. + else { + rop->curr_sent_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); + rop->curr_sent_bytes = rop->curr_sent_buf->nbytes; + } + // On the receiver size, we just append + rop->curr_recv_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); + } + else { + // Writes or reads are simple. Just do it from the beginnig. + rop->curr_recv_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + rop->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + rop->curr_sent_bytes = 0; + + } +} + /* Clears a remote image struct for reusing it. */ static struct rimage *clear_remote_image(struct rimage *rimg) { - pthread_mutex_lock(&(rimg->in_use)); - while (!list_is_singular(&(rimg->buf_head))) { struct rbuf *buf = list_entry(rimg->buf_head.prev, struct rbuf, l); @@ -524,179 +452,428 @@ static struct rimage *clear_remote_image(struct rimage *rimg) list_entry(rimg->buf_head.next, struct rbuf, l)->nbytes = 0; rimg->size = 0; - pthread_mutex_unlock(&(rimg->in_use)); - return rimg; } -struct rimage *prepare_remote_image(char *path, char *snapshot_id, int open_mode) +void handle_accept_write( + int cli_fd, char* snapshot_id, char* path, int flags, bool close_fd, uint64_t size) { + struct roperation *rop = NULL; struct rimage *rimg = get_rimg_by_name(snapshot_id, path); - /* There is no record of such image, create a new one. */ - if (rimg == NULL) - return new_remote_image(path, snapshot_id); - - pthread_mutex_lock(&rimg_lock); - list_del(&(rimg->l)); - pthread_mutex_unlock(&rimg_lock); + if (rimg == NULL) { + rimg = new_remote_image(path, snapshot_id); + if (rimg == NULL) { + pr_perror("Error preparing remote image"); + goto err; + } + } + else { + list_del(&(rimg->l)); + if (flags == O_APPEND) + clear_remote_image(rimg); + } + + rop = new_remote_operation(path, snapshot_id, cli_fd, flags, close_fd); + if (rop == NULL) { + pr_perror("Error preparing remote operation"); + goto err; + } + + rop_set_rimg(rop, rimg); + rop->size = size; + list_add_tail(&(rop->l), &rop_inprogress); + event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLIN, rop); + return; +err: + free(rimg); + free(rop); +} - /* There is already an image record. Simply return it for appending. */ - if (open_mode == O_APPEND) - return rimg; - /* There is already an image record. Clear it for writing. */ - else - return clear_remote_image(rimg); +void handle_accept_proxy_write( + int cli_fd, char* snapshot_id, char* path, int flags) +{ + handle_accept_write(cli_fd, snapshot_id, path, flags, true, 0); } -static void *process_local_read(struct wthread *wt) +void handle_accept_proxy_read( + int cli_fd, char* snapshot_id, char* path, int flags) { + struct roperation *rop = NULL; struct rimage *rimg = NULL; - int64_t ret; - /* TODO - split wait_for_image - * in cache - improve the parent stuff - * in proxy - do not wait for anything, return no file - */ - rimg = wait_for_image(wt); - if (!rimg) { - pr_info("No image %s:%s.\n", wt->path, wt->snapshot_id); - if (write_reply_header(wt->fd, ENOENT) < 0) + + rimg = get_rimg_by_name(snapshot_id, path); + + // Check if we already have the image. + if (rimg == NULL) { + pr_info("No image %s:%s.\n", path, snapshot_id); + if (write_reply_header(cli_fd, ENOENT) < 0) { pr_perror("Error writing reply header for unexisting image"); - close(wt->fd); - return NULL; - } else { - if (write_reply_header(wt->fd, 0) < 0) { + goto err; + } + } + else { + if (write_reply_header(cli_fd, 0) < 0) { pr_perror("Error writing reply header for %s:%s", - wt->path, wt->snapshot_id); - close(wt->fd); - return NULL; + path, snapshot_id); + goto err; } - } - pthread_mutex_lock(&(rimg->in_use)); - ret = send_image(wt->fd, rimg, wt->flags, true); - if (ret < 0) - pr_perror("Unable to send %s:%s to CRIU (sent %ld bytes)", - rimg->path, rimg->snapshot_id, (long)ret); - else - pr_info("Finished sending %s:%s to CRIU (sent %ld bytes)\n", - rimg->path, rimg->snapshot_id, (long)ret); - pthread_mutex_unlock(&(rimg->in_use)); - return NULL; + rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); + if (rop == NULL) { + pr_perror("Error preparing remote operation"); + goto err; + } + rop_set_rimg(rop, rimg); + list_add_tail(&(rop->l), &rop_inprogress); + event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); + } + return; +err: + close(cli_fd); } -static void *process_local_image_connection(void *ptr) +void finish_local() { - struct wthread *wt = (struct wthread *) ptr; - struct rimage *rimg = NULL; - int64_t ret; + int ret; + finished_local = true; + //shutdown(local_req_fd, SHUT_RD); //TODO - should this be removed? + ret = event_set(epoll_fd, EPOLL_CTL_DEL, local_req_fd, 0, 0); + if (ret) { + pr_perror("Failed to del local fd from epoll"); + } +} - /* NOTE: the code inside this if is shared for both cache and proxy. */ - if (wt->flags == O_RDONLY) - return process_local_read(wt); +void handle_accept_cache_read( + int cli_fd, char* snapshot_id, char* path, int flags) +{ + struct rimage *rimg = NULL; + struct roperation *rop = NULL; - /* NOTE: IMAGE PROXY ONLY. The image cache receives write connections - * through TCP (see accept_remote_image_connections). - */ - rimg = prepare_remote_image(wt->path, wt->snapshot_id, wt->flags); - ret = recv_image(wt->fd, rimg, 0, wt->flags, true); - if (ret < 0) { - pr_perror("Unable to receive %s:%s to CRIU (received %ld bytes)", - rimg->path, rimg->snapshot_id, (long)ret); - finalize_recv_rimg(NULL); - return NULL; + // Check if this is the restore finish message. + if (!strncmp(path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) { + close(cli_fd); + finish_local(); + return; } - finalize_recv_rimg(rimg); - pr_info("Finished receiving %s:%s (received %ld bytes)\n", - rimg->path, rimg->snapshot_id, (long)ret); + rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); + if (rop == NULL) { + pr_perror("Error preparing remote operation"); + close(cli_fd); + return; + } - if (!strncmp(rimg->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { - finished = true; - shutdown(local_req_fd, SHUT_RD); - } else { - pthread_mutex_lock(&proxy_to_cache_lock); - ret = forward_image(rimg); - pthread_mutex_unlock(&proxy_to_cache_lock); + // Check if we already have the image. + rimg = get_rimg_by_name(snapshot_id, path); + if (rimg != NULL && rimg->size > 0) { + if (write_reply_header(cli_fd, 0) < 0) { + pr_perror("Error writing reply header for %s:%s", + path, snapshot_id); + free(rop); + close(rop->fd); + } + rop_set_rimg(rop, rimg); + list_add_tail(&(rop->l), &rop_inprogress); + event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); } + // The file may exist in future. + else if (!finished_remote){ + list_add_tail(&(rop->l), &rop_pending); + } + // The file does not exist. + else { + pr_info("No image %s:%s.\n", path, snapshot_id); + if (write_reply_header(cli_fd, ENOENT) < 0) + pr_perror("Error writing reply header for unexisting image"); + free(rop); + close(cli_fd); + } +} - finalize_fwd_rimg(); - if (ret < 0) { - pr_perror("Unable to forward %s:%s to Image Cache", - rimg->path, rimg->snapshot_id); +void forward_remote_image(struct roperation* rop) +{ + uint64_t ret = 0; + // Set blocking during the setup. +// socket_set_blocking(rop->fd); // TODO - test - return NULL; + ret = write_remote_header( + rop->fd, rop->snapshot_id, rop->path, rop->flags, rop->size); + + if (ret < 0) { + pr_perror("Error writing header for %s:%s", + rop->path, rop->snapshot_id); + return; } - if (finished && !is_forwarding() && !is_receiving()) { - pr_info("Closing connection to Image Cache.\n"); - close(proxy_to_cache_fd); - unlock_workers(); + pr_info("[fd=%d] Fowarding %s request for %s:%s (%lu bytes\n", + rop->fd, + rop->flags == O_RDONLY ? "read" : + rop->flags == O_APPEND ? "append" : "write", + rop->path, rop->snapshot_id, rop->size); + + + // Go back to non-blocking +// socket_set_non_blocking(rop->fd); // TODO - test + + forwarding = true; + event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); +} + +void handle_remote_accept(int fd) +{ + char path[PATHLEN]; + char snapshot_id[PATHLEN]; + int flags; + uint64_t size = 0; + uint64_t ret; + + // Set blocking during the setup. +// socket_set_blocking(fd); // TODO - test! + + ret = read_remote_header(fd, snapshot_id, path, &flags, &size); + if (ret < 0) { + pr_perror("Unable to receive remote header from image proxy"); + goto err; } - return NULL; + /* This means that the no more images are coming. */ + else if (!ret) { + finished_remote = true; + pr_info("Image Proxy connection closed.\n"); + return; + } + + // Go back to non-blocking +// socket_set_non_blocking(fd); // TODO - test! + + pr_info("[fd=%d] Received %s request for %s:%s with %lu bytes\n", + fd, + flags == O_RDONLY ? "read" : + flags == O_APPEND ? "append" : "write", + path, snapshot_id, size); + + + forwarding = true; + handle_accept_write(fd, snapshot_id, path, flags, false, size); + return; +err: + close(fd); } void handle_local_accept(int fd) { - struct wthread *wt = NULL; int cli_fd; - pthread_t tid; + char path[PATHLEN]; + char snapshot_id[PATHLEN]; + int flags = 0; struct sockaddr_in cli_addr; socklen_t clilen = sizeof(cli_addr); cli_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); if (cli_fd < 0) { pr_perror("Unable to accept local image connection"); - goto err; + return; } - wt = new_worker(); - wt->fd = cli_fd; - - if (read_header(wt->fd, wt->snapshot_id, wt->path, &(wt->flags)) < 0) { + if (read_header(cli_fd, snapshot_id, path, &flags) < 0) { pr_err("Error reading local image header\n"); goto err; } - /* These function calls are used to avoid other threads from - * thinking that there are no more images are coming. - */ - if (wt->flags != O_RDONLY) { - prepare_recv_rimg(); - prepare_fwd_rimg(); + pr_info("[fd=%d] Received %s request for %s:%s\n", + cli_fd, + flags == O_RDONLY ? "read" : + flags == O_APPEND ? "append" : "write", + path, snapshot_id); + + // Write/Append case (only possible in img-proxy). + if (flags != O_RDONLY) { + handle_accept_proxy_write(cli_fd, snapshot_id, path, flags); + } + // Read case while restoring (img-cache). + else if (restoring) { + handle_accept_cache_read(cli_fd, snapshot_id, path, flags); + } + // Read case while dumping (img-proxy). + else { + handle_accept_proxy_read(cli_fd, snapshot_id, path, flags); } - pr_info("Received %s request for %s:%s\n", - wt->flags == O_RDONLY ? "read" : - wt->flags == O_APPEND ? "append" : "write", - wt->path, wt->snapshot_id); + // Set socket non-blocking. + socket_set_non_blocking(cli_fd); + return; +err: + close(cli_fd); +} - if (pthread_create( - &tid, NULL, process_local_image_connection, (void *) wt)) { - pr_perror("Unable to create worker thread"); - goto err; +void finish_proxy_read(struct roperation* rop) +{ + // If finished forwarding image + if (rop->fd == proxy_to_cache_fd) { + // Update fwd buffer and byte count on rimg. + rop->rimg->curr_fwd_buf = rop->curr_sent_buf; + rop->rimg->curr_fwd_bytes = rop->curr_sent_bytes; + + forwarding = false; + + // If there are images waiting to be forwarded, forward the next. + if (!list_empty(&rop_forwarding)) { + forward_remote_image(list_entry(rop_forwarding.next, struct roperation, l)); + } + } +} + +void finish_proxy_write(struct roperation* rop) +{ + // No more local images are comming. Close local socket. + if (!strncmp(rop->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { + // TODO - couldn't we handle the DUMP_FINISH in inside handle_accept_proxy_write? + finish_local(); + } + // Normal image received, forward it. + else { + struct roperation *rop_to_forward = new_remote_operation( + rop->path, rop->snapshot_id, proxy_to_cache_fd, rop->flags, false); + + // Add image to list of images. + list_add_tail(&(rop->rimg->l), &rimg_head); + + rop_set_rimg(rop_to_forward, rop->rimg); + if (list_empty(&rop_forwarding)) { + forward_remote_image(rop_to_forward); + } + list_add_tail(&(rop_to_forward->l), &rop_forwarding); + } +} + +void finish_cache_write(struct roperation* rop) +{ + struct roperation *prop = get_rop_by_name( + &rop_pending, rop->snapshot_id, rop->path); + + forwarding = false; + event_set(epoll_fd, EPOLL_CTL_ADD, proxy_to_cache_fd, EPOLLIN, &proxy_to_cache_fd); + + // Add image to list of images. + list_add_tail(&(rop->rimg->l), &rimg_head); + + // TODO - what if we have multiple requests for the same name? + if (prop != NULL) { + pr_info("\t[fd=%d] Resuming pending %s for %s:%s\n", + prop->fd, + prop->flags == O_APPEND ? + "append" : prop->flags == O_RDONLY ? + "read" : "write", + prop->snapshot_id, prop->path); + + // Write header for pending image. + if (write_reply_header(prop->fd, 0) < 0) { + pr_perror("Error writing reply header for %s:%s", + prop->path, prop->snapshot_id); + close(prop->fd); + free(prop); + return; + } + + rop_set_rimg(prop, rop->rimg); + list_del(&(prop->l)); + list_add_tail(&(prop->l), &rop_inprogress); + event_set(epoll_fd, EPOLL_CTL_ADD, prop->fd, EPOLLOUT, prop); + } +} + +void handle_roperation(struct epoll_event *event, struct roperation *rop) +{ + int64_t ret = (EPOLLOUT & event->events) ? + send_image_async(rop) : + recv_image_async(rop); + + if (ret > 0 || ret == EAGAIN || ret == EWOULDBLOCK) { + event_set( + epoll_fd, + EPOLL_CTL_ADD, + rop->fd, + event->events, + rop); + return; } - wt->tid = tid; - add_worker(wt); - return; + + // Remove rop from list (either in progress or forwarding). + list_del(&(rop->l)); + + // Operation is finished. + if (ret < 0) { + pr_perror("Unable to %s %s:%s (returned %ld)", + event->events & EPOLLOUT ? "send" : "receive", + rop->rimg->path, rop->rimg->snapshot_id, ret); + goto err; + } else { + pr_info("[fd=%d] Finished %s %s:%s to CRIU (size %ld)\n", + rop->fd, + event->events & EPOLLOUT ? "sending" : "receiving", + rop->rimg->path, rop->rimg->snapshot_id, rop->rimg->size); + } + + // If receive operation is finished + if (event->events & EPOLLIN) { + + // Cached side (finished receiving forwarded image) + if (restoring) { + finish_cache_write(rop); + } + // Proxy side (finished receiving local image) + else { + finish_proxy_write(rop); + } + } + // If send operation if finished + else { + // Proxy side (Finished forwarding image or reading it locally). + if (!restoring) + finish_proxy_read(rop); + // Nothing to be done when a read is finished on the cache side. + } err: - close(cli_fd); - free(wt); + free(rop); } +void check_pending_forwards() +{ + struct roperation *rop = NULL; + struct rimage *rimg = NULL; + + list_for_each_entry(rop, &rop_forwarding, l) { + rimg = get_rimg_by_name(rop->snapshot_id, rop->path); + if (rimg != NULL) { + rop_set_rimg(rop, rimg); + forward_remote_image(rop); + return; + } + } +} -void *accept_local_image_connections(void *port) +void check_pending_reads() { - int fd = *((int *) port); - int epoll_fd; - struct epoll_event *events; + struct roperation *rop = NULL; + struct rimage *rimg = NULL; + + list_for_each_entry(rop, &rop_pending, l) { + rimg = get_rimg_by_name(rop->snapshot_id, rop->path); + if (rimg != NULL) { + rop_set_rimg(rop, rimg); + event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); + } + } +} + +void accept_image_connections() { int ret; epoll_fd = epoll_create(EPOLL_MAX_EVENTS); if (epoll_fd < 0) { pr_perror("Unable to open epoll"); - return NULL; + return; } events = calloc(EPOLL_MAX_EVENTS, sizeof(struct epoll_event)); @@ -705,57 +882,89 @@ void *accept_local_image_connections(void *port) goto end; } - ret = event_set(epoll_fd, EPOLL_CTL_ADD, fd, EPOLLIN, &fd); + ret = event_set(epoll_fd, EPOLL_CTL_ADD, local_req_fd, EPOLLIN, &local_req_fd); if (ret) { - pr_perror("Failed to set event for epoll"); + pr_perror("Failed to add local fd to epoll"); goto end; } + // Only if we are restoring (cache-side) we need to add the remote sock to + // the epoll. + if (restoring) { + ret = event_set(epoll_fd, EPOLL_CTL_ADD, proxy_to_cache_fd, + EPOLLIN, &proxy_to_cache_fd); + if (ret) { + pr_perror("Failed to add proxy to cache fd to epoll"); + goto end; + } + } + while (1) { - int n_events = epoll_wait(epoll_fd, events, EPOLL_MAX_EVENTS, -1); + int n_events = epoll_wait(epoll_fd, events, EPOLL_MAX_EVENTS, 250); if (n_events < 0) { pr_perror("Failed to epoll wait"); goto end; } for (int i = 0; i < n_events; i++) { - if (events[i].data.ptr == &fd) { + // Accept from local dump/restore? + if (events[i].data.ptr == &local_req_fd) { if ( events[i].events & EPOLLHUP || events[i].events & EPOLLERR) { - if (!finished) + if (!finished_local) pr_perror("Unable to accept more local image connections"); goto end; } - // accept - pr_perror("Calling accept %d", i); - handle_local_accept(fd); + handle_local_accept(local_req_fd); } + else if (restoring && !forwarding && events[i].data.ptr == &proxy_to_cache_fd) { + event_set(epoll_fd, EPOLL_CTL_DEL, proxy_to_cache_fd, 0, 0); + handle_remote_accept(proxy_to_cache_fd); + } else { - // TODO - handle write/read - pr_perror("Event on unexpected file descripor"); - goto end; + struct roperation *rop = + (struct roperation*)events[i].data.ptr; + event_set(epoll_fd, EPOLL_CTL_DEL, rop->fd, 0, 0); + handle_roperation(&events[i], rop); } } - } + + // Check if there are any pending operations + if (restoring) + check_pending_reads(); + else if (!forwarding) + check_pending_forwards(); + + // Check if we can close the tcp socket (this will unblock the cache + // to answer "no image" to restore). + if (!restoring && + finished_local && + !finished_remote && + list_empty(&rop_forwarding)) { + close(proxy_to_cache_fd); + finished_remote = true; + } + + // If both local and remote sockets are closed, leave. + if (finished_local && finished_remote) { + pr_info("\tFinished both local and remote, exiting\n"); + goto end; + } + } end: + // TODO - release pending when no receiving and finished. close(epoll_fd); - close(fd); + close(local_req_fd); free(events); - return NULL; } int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool close_fd) { int ret; - struct roperation *op = malloc(sizeof(struct roperation)); - bzero(op, sizeof(struct roperation)); - op->fd = fd; - op->rimg = rimg; - op->size = size; - op->flags = flags; - op->close_fd = close_fd; - op->curr_recv_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + struct roperation *op = new_remote_operation( + rimg->path, rimg->snapshot_id, fd, flags, close_fd); + rop_set_rimg(op, rimg); while ((ret = recv_image_async(op)) < 0) if (ret != EAGAIN && ret != EWOULDBLOCK) return -1; @@ -771,70 +980,58 @@ int64_t recv_image_async(struct roperation *op) int fd = op->fd; struct rimage *rimg = op->rimg; uint64_t size = op->size; - int flags = op->flags; bool close_fd = op->close_fd; struct rbuf *curr_buf = op->curr_recv_buf; int n; - if (curr_buf == NULL) { - if (flags == O_APPEND) - curr_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); - else - curr_buf = list_entry(rimg->buf_head.next, struct rbuf, l); - } - - while (1) { - n = read(fd, + n = read(fd, curr_buf->buffer + curr_buf->nbytes, size ? min((int) (size - rimg->size), BUF_SIZE - curr_buf->nbytes) : BUF_SIZE - curr_buf->nbytes); - if (n == 0) { - if (close_fd) - close(fd); - return rimg->size; - } else if (n > 0) { - curr_buf->nbytes += n; - rimg->size += n; - if (curr_buf->nbytes == BUF_SIZE) { - struct rbuf *buf = malloc(sizeof(struct rbuf)); - if (buf == NULL) { - pr_perror("Unable to allocate remote_buffer structures"); - if (close_fd) - close(fd); - return -1; - } - buf->nbytes = 0; - list_add_tail(&(buf->l), &(rimg->buf_head)); - curr_buf = buf; - } - if (size && rimg->size == size) { + if (n == 0) { + if (close_fd) + close(fd); + return n; + } else if (n > 0) { + curr_buf->nbytes += n; + rimg->size += n; + if (curr_buf->nbytes == BUF_SIZE) { + struct rbuf *buf = malloc(sizeof(struct rbuf)); + if (buf == NULL) { + pr_perror("Unable to allocate remote_buffer structures"); if (close_fd) close(fd); - return rimg->size; + return -1; } - } else if (errno == EAGAIN || errno == EWOULDBLOCK) { - return errno; - } else { - pr_perror("Read on %s:%s socket failed", - rimg->path, rimg->snapshot_id); + buf->nbytes = 0; + list_add_tail(&(buf->l), &(rimg->buf_head)); + op->curr_recv_buf = buf; + return n; + } + if (size && rimg->size == size) { if (close_fd) close(fd); - return -1; + return 0; } + } else if (errno == EAGAIN || errno == EWOULDBLOCK) { + return errno; + } else { + pr_perror("Read for %s:%s socket on fd=%d failed", + rimg->path, rimg->snapshot_id, fd); + if (close_fd) + close(fd); + return -1; } + return n; } int64_t send_image(int fd, struct rimage *rimg, int flags, bool close_fd) { int ret; - struct roperation *op = malloc(sizeof(struct roperation)); - bzero(op, sizeof(struct roperation)); - op->fd = fd; - op->rimg = rimg; - op->flags = flags; - op->close_fd = close_fd; - op->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); + struct roperation *op = new_remote_operation( + rimg->path, rimg->snapshot_id, fd, flags, close_fd); + rop_set_rimg(op, rimg); while ((ret = send_image_async(op)) < 0) if (ret != EAGAIN && ret != EWOULDBLOCK) return -1; @@ -845,47 +1042,43 @@ int64_t send_image_async(struct roperation *op) { int fd = op->fd; struct rimage *rimg = op->rimg; - int flags = op->flags; bool close_fd = op->close_fd; int n; - if (flags != O_APPEND) { - op->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); - op->curr_sent_bytes = 0; - } - - while (1) { - n = send( - fd, - op->curr_sent_buf->buffer + op->curr_sent_bytes, - min(BUF_SIZE, op->curr_sent_buf->nbytes) - op->curr_sent_bytes, - MSG_NOSIGNAL); - if (n > -1) { - op->curr_sent_bytes += n; - if (op->curr_sent_bytes == BUF_SIZE) { - op->curr_sent_buf = - list_entry(op->curr_sent_buf->l.next, struct rbuf, l); - op->nblocks++; - op->curr_sent_bytes = 0; - } else if (op->curr_sent_bytes == op->curr_sent_buf->nbytes) { - if (close_fd) - close(fd); - return op->nblocks*BUF_SIZE + op->curr_sent_buf->nbytes; - } - } else if (errno == EPIPE || errno == ECONNRESET) { - pr_warn("Connection for %s:%s was closed early than expected\n", - rimg->path, rimg->snapshot_id); - return 0; - } else if (errno == EAGAIN || errno == EWOULDBLOCK) { - return errno; + n = write( + fd, + op->curr_sent_buf->buffer + op->curr_sent_bytes, + min(BUF_SIZE, op->curr_sent_buf->nbytes) - op->curr_sent_bytes); + + if (n > -1) { + op->curr_sent_bytes += n; + if (op->curr_sent_bytes == BUF_SIZE) { + op->curr_sent_buf = + list_entry(op->curr_sent_buf->l.next, struct rbuf, l); + op->curr_sent_bytes = 0; + return n; } - else { - pr_perror("Write on %s:%s socket failed", - rimg->path, rimg->snapshot_id); - return -1; + // TODO - cloudn't we just compare to the img size? + else if (op->curr_sent_bytes == op->curr_sent_buf->nbytes) { + if (close_fd) + close(fd); + return 0; } + return n; + } + // TODO - clouldn't these checks be made upstream? + else if (errno == EPIPE || errno == ECONNRESET) { + pr_warn("Connection for %s:%s was closed early than expected\n", + rimg->path, rimg->snapshot_id); + return 0; + } else if (errno == EAGAIN || errno == EWOULDBLOCK) { + return errno; + } + else { + pr_perror("Write on %s:%s socket failed", + rimg->path, rimg->snapshot_id); + return -1; } - } int read_remote_image_connection(char *snapshot_id, char *path) diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index 779a137fcd..029857f70e 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -32,32 +32,31 @@ struct rbuf { }; struct rimage { + /* Path and snapshot id identify the image. */ char path[PATHLEN]; char snapshot_id[PATHLEN]; + /* List anchor. */ struct list_head l; + /* List of buffers that compose the image. */ struct list_head buf_head; - uint64_t size; /* number of bytes */ - pthread_mutex_t in_use; /* Only one operation at a time, per image. */ -}; - -struct wthread { - pthread_t tid; - struct list_head l; - /* Client fd. */ - int fd; - /* The path and snapshot_id identify the request handled by this thread. */ - char path[PATHLEN]; - char snapshot_id[PATHLEN]; - int flags; - /* This semph is used to wake this thread if the image is in memory.*/ - sem_t wakeup_sem; + /* Number of bytes. */ + uint64_t size; + /* Note: forward (send) operation only. Buffer to start forwarding. */ + struct rbuf *curr_fwd_buf; + /* Note: forward (send) operation only. Number of fwd bytes in 'curr_fw_buf'. */ + uint64_t curr_fwd_bytes; }; /* Structure that describes the state of a remote operation on remote images. */ struct roperation { + /* List anchor. */ + struct list_head l; /* File descriptor being used. */ int fd; - /* Remote image being used. */ + /* Path and snapshot id identify the required image. */ + char path[PATHLEN]; + char snapshot_id[PATHLEN]; + /* Remote image being used (may be null if the operation is pending). */ struct rimage *rimg; /* Flags for the operation. */ int flags; @@ -66,37 +65,23 @@ struct roperation { /* Note: recv operation only. How much bytes should be received. */ uint64_t size; /* Note: recv operation only. Buffer being writen. */ - struct rbuf *curr_recv_buf; - /* Note: send operation only. Number of blocks already sent. */ - int nblocks; + struct rbuf *curr_recv_buf; // TODO - needed? Could be replaced by list.last! /* Note: send operation only. Pointer to buffer being sent. */ struct rbuf *curr_sent_buf; /* Note: send operation only. Number of bytes sent in 'curr_send_buf. */ uint64_t curr_sent_bytes; }; -/* This variable is used to indicate when the dump is finished. */ -extern bool finished; /* This is the proxy to cache TCP socket FD. */ extern int proxy_to_cache_fd; /* This the unix socket used to fulfill local requests. */ extern int local_req_fd; +/* True if we are running the cache/restore, false if proxy/dump. */ +extern bool restoring; -int init_daemon(bool background, struct rimage *(*wfi)(struct wthread*)); - -void join_workers(void); -void unlock_workers(void); - -void prepare_recv_rimg(void); -void finalize_recv_rimg(struct rimage *rimg); -struct rimage *prepare_remote_image(char *path, char *namesapce, int flags); +void accept_image_connections(); struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path); -bool is_receiving(void); - -void *accept_local_image_connections(void *ptr); -void *accept_remote_image_connections(void *ptr); -int64_t forward_image(struct rimage *rimg); int64_t send_image(int fd, struct rimage *rimg, int flags, bool image_check); int64_t send_image_async(struct roperation *op); int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool image_check); From 53e8a31f4f23d6413ffcba2cee75337ca2103bdf Mon Sep 17 00:00:00 2001 From: Rodrigo Bruno Date: Mon, 14 May 2018 01:29:50 +0100 Subject: [PATCH 025/277] remote: Minor improvements on img-remote.c --- criu/img-remote.c | 155 +++++++++++++++++++++++----------------------- 1 file changed, 77 insertions(+), 78 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 218c29684b..13a56fc2c8 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -44,14 +44,26 @@ LIST_HEAD(rop_forwarding); // List of snapshots (useful when doing incremental restores/dumps LIST_HEAD(snapshot_head); +// Snapshot id (setup at launch time by dump or restore). static char *snapshot_id; -bool restoring = true; // TODO - check where this is used! -// TODO - split this into two vars, recv_from_proxy, send_to_cache -bool forwarding = false; // TODO - true if proxy_to_cache_fd is being used. + +// True if restoring (cache := true; proxy := false). +bool restoring = true; + +// True if the proxy to cache socket is being used (receiving or sending). +bool forwarding = false; + +// True if the local dump or restore is finished. bool finished_local = false; + +// True if the communication between the proxy and cache can be closed. bool finished_remote = false; + +// Proxy to cache socket fd; Local dump or restore servicing fd. int proxy_to_cache_fd; int local_req_fd; + +// Epoll fd and event array. int epoll_fd; struct epoll_event *events; @@ -455,7 +467,7 @@ static struct rimage *clear_remote_image(struct rimage *rimg) return rimg; } -void handle_accept_write( +struct roperation* handle_accept_write( int cli_fd, char* snapshot_id, char* path, int flags, bool close_fd, uint64_t size) { struct roperation *rop = NULL; @@ -482,25 +494,24 @@ void handle_accept_write( rop_set_rimg(rop, rimg); rop->size = size; - list_add_tail(&(rop->l), &rop_inprogress); - event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLIN, rop); - return; + return rop; err: free(rimg); free(rop); + return NULL; } -void handle_accept_proxy_write( +struct roperation* handle_accept_proxy_write( int cli_fd, char* snapshot_id, char* path, int flags) { - handle_accept_write(cli_fd, snapshot_id, path, flags, true, 0); + return handle_accept_write(cli_fd, snapshot_id, path, flags, true, 0); } -void handle_accept_proxy_read( +struct roperation* handle_accept_proxy_read( int cli_fd, char* snapshot_id, char* path, int flags) { struct roperation *rop = NULL; - struct rimage *rimg = NULL; + struct rimage *rimg = NULL; rimg = get_rimg_by_name(snapshot_id, path); @@ -511,40 +522,40 @@ void handle_accept_proxy_read( pr_perror("Error writing reply header for unexisting image"); goto err; } + close(cli_fd); + return NULL; } - else { - if (write_reply_header(cli_fd, 0) < 0) { - pr_perror("Error writing reply header for %s:%s", - path, snapshot_id); - goto err; - } - rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); - if (rop == NULL) { - pr_perror("Error preparing remote operation"); - goto err; - } - rop_set_rimg(rop, rimg); - list_add_tail(&(rop->l), &rop_inprogress); - event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); + if (write_reply_header(cli_fd, 0) < 0) { + pr_perror("Error writing reply header for %s:%s", + path, snapshot_id); + goto err; } - return; + + rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); + if (rop == NULL) { + pr_perror("Error preparing remote operation"); + goto err; + } + + rop_set_rimg(rop, rimg); + return rop; err: close(cli_fd); + return NULL; } void finish_local() { int ret; finished_local = true; - //shutdown(local_req_fd, SHUT_RD); //TODO - should this be removed? ret = event_set(epoll_fd, EPOLL_CTL_DEL, local_req_fd, 0, 0); if (ret) { pr_perror("Failed to del local fd from epoll"); } } -void handle_accept_cache_read( +struct roperation* handle_accept_cache_read( int cli_fd, char* snapshot_id, char* path, int flags) { struct rimage *rimg = NULL; @@ -554,14 +565,14 @@ void handle_accept_cache_read( if (!strncmp(path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) { close(cli_fd); finish_local(); - return; + return NULL; } rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); if (rop == NULL) { pr_perror("Error preparing remote operation"); close(cli_fd); - return; + return NULL; } // Check if we already have the image. @@ -574,28 +585,25 @@ void handle_accept_cache_read( close(rop->fd); } rop_set_rimg(rop, rimg); - list_add_tail(&(rop->l), &rop_inprogress); - event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); - } - // The file may exist in future. - else if (!finished_remote){ - list_add_tail(&(rop->l), &rop_pending); + return rop; } // The file does not exist. - else { + else if (finished_remote) { pr_info("No image %s:%s.\n", path, snapshot_id); if (write_reply_header(cli_fd, ENOENT) < 0) pr_perror("Error writing reply header for unexisting image"); free(rop); close(cli_fd); } + return NULL; } void forward_remote_image(struct roperation* rop) { uint64_t ret = 0; + // Set blocking during the setup. -// socket_set_blocking(rop->fd); // TODO - test + socket_set_blocking(rop->fd); ret = write_remote_header( rop->fd, rop->snapshot_id, rop->path, rop->flags, rop->size); @@ -614,7 +622,7 @@ void forward_remote_image(struct roperation* rop) // Go back to non-blocking -// socket_set_non_blocking(rop->fd); // TODO - test + socket_set_non_blocking(rop->fd); forwarding = true; event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); @@ -627,9 +635,10 @@ void handle_remote_accept(int fd) int flags; uint64_t size = 0; uint64_t ret; + struct roperation* rop = NULL; // Set blocking during the setup. -// socket_set_blocking(fd); // TODO - test! + socket_set_blocking(fd); ret = read_remote_header(fd, snapshot_id, path, &flags, &size); if (ret < 0) { @@ -644,7 +653,7 @@ void handle_remote_accept(int fd) } // Go back to non-blocking -// socket_set_non_blocking(fd); // TODO - test! + socket_set_non_blocking(fd); pr_info("[fd=%d] Received %s request for %s:%s with %lu bytes\n", fd, @@ -654,8 +663,13 @@ void handle_remote_accept(int fd) forwarding = true; - handle_accept_write(fd, snapshot_id, path, flags, false, size); - return; + rop = handle_accept_write(fd, snapshot_id, path, flags, false, size); + + if (rop != NULL) { + list_add_tail(&(rop->l), &rop_inprogress); + event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLIN, rop); + } + return; err: close(fd); } @@ -668,6 +682,7 @@ void handle_local_accept(int fd) int flags = 0; struct sockaddr_in cli_addr; socklen_t clilen = sizeof(cli_addr); + struct roperation *rop = NULL; cli_fd = accept(fd, (struct sockaddr *) &cli_addr, &clilen); if (cli_fd < 0) { @@ -688,19 +703,32 @@ void handle_local_accept(int fd) // Write/Append case (only possible in img-proxy). if (flags != O_RDONLY) { - handle_accept_proxy_write(cli_fd, snapshot_id, path, flags); + rop = handle_accept_proxy_write(cli_fd, snapshot_id, path, flags); } // Read case while restoring (img-cache). else if (restoring) { - handle_accept_cache_read(cli_fd, snapshot_id, path, flags); + rop = handle_accept_cache_read(cli_fd, snapshot_id, path, flags); } // Read case while dumping (img-proxy). else { - handle_accept_proxy_read(cli_fd, snapshot_id, path, flags); + rop = handle_accept_proxy_read(cli_fd, snapshot_id, path, flags); } - // Set socket non-blocking. - socket_set_non_blocking(cli_fd); + // If we have an operation. Check if we are ready to start or not. + if (rop != NULL) { + if (rop->rimg != NULL) { + list_add_tail(&(rop->l), &rop_inprogress); + event_set( + epoll_fd, + EPOLL_CTL_ADD, + rop->fd, + rop->flags == O_RDONLY ? EPOLLOUT : EPOLLIN, + rop); + } else { + list_add_tail(&(rop->l), &rop_pending); + } + socket_set_non_blocking(rop->fd); + } return; err: @@ -758,7 +786,6 @@ void finish_cache_write(struct roperation* rop) // Add image to list of images. list_add_tail(&(rop->rimg->l), &rimg_head); - // TODO - what if we have multiple requests for the same name? if (prop != NULL) { pr_info("\t[fd=%d] Resuming pending %s for %s:%s\n", prop->fd, @@ -947,31 +974,17 @@ void accept_image_connections() { // If both local and remote sockets are closed, leave. if (finished_local && finished_remote) { - pr_info("\tFinished both local and remote, exiting\n"); + pr_info("Finished both local and remote, exiting\n"); goto end; } } end: - // TODO - release pending when no receiving and finished. close(epoll_fd); close(local_req_fd); free(events); } -int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool close_fd) -{ - int ret; - struct roperation *op = new_remote_operation( - rimg->path, rimg->snapshot_id, fd, flags, close_fd); - rop_set_rimg(op, rimg); - while ((ret = recv_image_async(op)) < 0) - if (ret != EAGAIN && ret != EWOULDBLOCK) - return -1; - free(op); - return ret; -} - /* Note: size is a limit on how much we want to read from the socket. Zero means * read until the socket is closed. */ @@ -1026,18 +1039,6 @@ int64_t recv_image_async(struct roperation *op) return n; } -int64_t send_image(int fd, struct rimage *rimg, int flags, bool close_fd) -{ - int ret; - struct roperation *op = new_remote_operation( - rimg->path, rimg->snapshot_id, fd, flags, close_fd); - rop_set_rimg(op, rimg); - while ((ret = send_image_async(op)) < 0) - if (ret != EAGAIN && ret != EWOULDBLOCK) - return -1; - return ret; -} - int64_t send_image_async(struct roperation *op) { int fd = op->fd; @@ -1058,7 +1059,6 @@ int64_t send_image_async(struct roperation *op) op->curr_sent_bytes = 0; return n; } - // TODO - cloudn't we just compare to the img size? else if (op->curr_sent_bytes == op->curr_sent_buf->nbytes) { if (close_fd) close(fd); @@ -1066,7 +1066,6 @@ int64_t send_image_async(struct roperation *op) } return n; } - // TODO - clouldn't these checks be made upstream? else if (errno == EPIPE || errno == ECONNRESET) { pr_warn("Connection for %s:%s was closed early than expected\n", rimg->path, rimg->snapshot_id); From b1ddec7d5af5447150b91571ed3e78427e0fb290 Mon Sep 17 00:00:00 2001 From: Rodrigo Bruno Date: Mon, 14 May 2018 01:29:51 +0100 Subject: [PATCH 026/277] remote: Fixing identation. --- criu/img-cache.c | 32 +-- criu/img-proxy.c | 10 +- criu/img-remote.c | 395 +++++++++++++++++++------------------- criu/include/img-remote.h | 6 +- 4 files changed, 221 insertions(+), 222 deletions(-) diff --git a/criu/img-cache.c b/criu/img-cache.c index c941f14e25..98fdf80e6a 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -10,23 +10,23 @@ int accept_proxy_to_cache(int sockfd) { - struct sockaddr_in cli_addr; - socklen_t clilen = sizeof(cli_addr); - int proxy_fd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen); + struct sockaddr_in cli_addr; + socklen_t clilen = sizeof(cli_addr); + int proxy_fd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen); - if (proxy_fd < 0) { - pr_perror("Unable to accept remote image connection from image proxy"); - return -1; - } + if (proxy_fd < 0) { + pr_perror("Unable to accept remote image connection from image proxy"); + return -1; + } - return proxy_fd; + return proxy_fd; } int image_cache(bool background, char *local_cache_path, unsigned short cache_write_port) { pr_info("Proxy to Cache Port %d, CRIU to Cache Path %s\n", cache_write_port, local_cache_path); - restoring = true; + restoring = true; if (opts.ps_socket != -1) { proxy_to_cache_fd = opts.ps_socket; @@ -37,13 +37,13 @@ int image_cache(bool background, char *local_cache_path, unsigned short cache_wr pr_perror("Unable to open proxy to cache TCP socket"); return -1; } - // Wait to accept connection from proxy. - proxy_to_cache_fd = accept_proxy_to_cache(proxy_to_cache_fd); - if (proxy_to_cache_fd < 0) - return -1; // TODO - should close other sockets. + // Wait to accept connection from proxy. + proxy_to_cache_fd = accept_proxy_to_cache(proxy_to_cache_fd); + if (proxy_to_cache_fd < 0) + return -1; // TODO - should close other sockets. } - pr_info("Cache is connected to Proxy through fd %d\n", proxy_to_cache_fd); + pr_info("Cache is connected to Proxy through fd %d\n", proxy_to_cache_fd); local_req_fd = setup_UNIX_server_socket(local_cache_path); if (local_req_fd < 0) { @@ -52,14 +52,14 @@ int image_cache(bool background, char *local_cache_path, unsigned short cache_wr } - if (background) { + if (background) { if (daemon(1, 0) == -1) { pr_perror("Can't run service server in the background"); return -1; } } - accept_image_connections(); + accept_image_connections(); pr_info("Finished image cache."); return 0; } diff --git a/criu/img-proxy.c b/criu/img-proxy.c index 9551a7dcbc..c4d442e67d 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -12,7 +12,7 @@ int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigne { pr_info("CRIU to Proxy Path: %s, Cache Address %s:%hu\n", local_proxy_path, fwd_host, fwd_port); - restoring = false; + restoring = false; local_req_fd = setup_UNIX_server_socket(local_proxy_path); if (local_req_fd < 0) { @@ -31,17 +31,17 @@ int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigne } } - pr_info("Proxy is connected to Cache through fd %d\n", proxy_to_cache_fd); + pr_info("Proxy is connected to Cache through fd %d\n", proxy_to_cache_fd); - if (background) { + if (background) { if (daemon(1, 0) == -1) { pr_perror("Can't run service server in the background"); return -1; } } - // TODO - local_req_fd and proxy_to_cache_fd send as args. - accept_image_connections(); + // TODO - local_req_fd and proxy_to_cache_fd send as args. + accept_image_connections(); pr_info("Finished image proxy."); return 0; } diff --git a/criu/img-remote.c b/criu/img-remote.c index 13a56fc2c8..af4d88dcc0 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -99,7 +99,7 @@ struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path) list_for_each_entry(rimg, &rimg_head, l) { if (!strncmp(rimg->path, path, PATHLEN) && - !strncmp(rimg->snapshot_id, snapshot_id, PATHLEN)) { + !strncmp(rimg->snapshot_id, snapshot_id, PATHLEN)) { return rimg; } } @@ -107,13 +107,13 @@ struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path) } struct roperation *get_rop_by_name( - struct list_head *head, const char *snapshot_id, const char *path) + struct list_head *head, const char *snapshot_id, const char *path) { struct roperation *rop = NULL; list_for_each_entry(rop, head, l) { if (!strncmp(rop->path, path, PATHLEN) && - !strncmp(rop->snapshot_id, snapshot_id, PATHLEN)) { + !strncmp(rop->snapshot_id, snapshot_id, PATHLEN)) { return rop; } } @@ -137,7 +137,7 @@ int setup_TCP_server_socket(int port) serv_addr.sin_port = htons(port); if (setsockopt( - sockfd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) == -1) { + sockfd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) == -1) { pr_perror("Unable to set SO_REUSEADDR"); goto err; } @@ -181,8 +181,8 @@ int setup_TCP_client_socket(char *hostname, int port) bzero((char *) &serv_addr, sizeof(serv_addr)); serv_addr.sin_family = AF_INET; bcopy((char *) server->h_addr, - (char *) &serv_addr.sin_addr.s_addr, - server->h_length); + (char *) &serv_addr.sin_addr.s_addr, + server->h_length); serv_addr.sin_port = htons(port); if (connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { @@ -216,7 +216,7 @@ void socket_set_non_blocking(int fd) if (flags < 0) { pr_perror("Failed to obtain flags from fd %d", fd); return; - } + } flags |= O_NONBLOCK; if (fcntl(fd, F_SETFL, flags) < 0) @@ -230,7 +230,7 @@ void socket_set_blocking(int fd) if (flags < 0) { pr_perror("Failed to obtain flags from fd %d", fd); return; - } + } flags &= (~O_NONBLOCK); if (fcntl(fd, F_SETFL, flags) < 0) @@ -405,29 +405,29 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) } static struct roperation *new_remote_operation( - char *path, char *snapshot_id, int cli_fd, int flags, bool close_fd) + char *path, char *snapshot_id, int cli_fd, int flags, bool close_fd) { - struct roperation *rop = calloc(1, sizeof(struct roperation)); + struct roperation *rop = calloc(1, sizeof(struct roperation)); - if (rop == NULL) { - pr_perror("Unable to allocate remote operation structures"); + if (rop == NULL) { + pr_perror("Unable to allocate remote operation structures"); return NULL; - } - strncpy(rop->path, path, PATHLEN -1 ); + } + strncpy(rop->path, path, PATHLEN -1 ); strncpy(rop->snapshot_id, snapshot_id, PATHLEN - 1); rop->path[PATHLEN - 1] = '\0'; rop->snapshot_id[PATHLEN - 1] = '\0'; - rop->fd = cli_fd; - rop->flags = flags; - rop->close_fd = close_fd; + rop->fd = cli_fd; + rop->flags = flags; + rop->close_fd = close_fd; - return rop; + return rop; } static void rop_set_rimg(struct roperation* rop, struct rimage* rimg) { - rop->rimg = rimg; - rop->size = rimg->size; + rop->rimg = rimg; + rop->size = rimg->size; if (rop->flags == O_APPEND) { // Image forward on append must start where the last fwd finished. if (rop->fd == proxy_to_cache_fd) { @@ -468,36 +468,36 @@ static struct rimage *clear_remote_image(struct rimage *rimg) } struct roperation* handle_accept_write( - int cli_fd, char* snapshot_id, char* path, int flags, bool close_fd, uint64_t size) + int cli_fd, char* snapshot_id, char* path, int flags, bool close_fd, uint64_t size) { - struct roperation *rop = NULL; + struct roperation *rop = NULL; struct rimage *rimg = get_rimg_by_name(snapshot_id, path); if (rimg == NULL) { rimg = new_remote_image(path, snapshot_id); - if (rimg == NULL) { - pr_perror("Error preparing remote image"); - goto err; - } - } - else { - list_del(&(rimg->l)); - if (flags == O_APPEND) - clear_remote_image(rimg); - } - - rop = new_remote_operation(path, snapshot_id, cli_fd, flags, close_fd); - if (rop == NULL) { - pr_perror("Error preparing remote operation"); - goto err; - } - - rop_set_rimg(rop, rimg); + if (rimg == NULL) { + pr_perror("Error preparing remote image"); + goto err; + } + } + else { + list_del(&(rimg->l)); + if (flags == O_APPEND) + clear_remote_image(rimg); + } + + rop = new_remote_operation(path, snapshot_id, cli_fd, flags, close_fd); + if (rop == NULL) { + pr_perror("Error preparing remote operation"); + goto err; + } + + rop_set_rimg(rop, rimg); rop->size = size; return rop; err: - free(rimg); - free(rop); + free(rimg); + free(rop); return NULL; } @@ -508,21 +508,21 @@ struct roperation* handle_accept_proxy_write( } struct roperation* handle_accept_proxy_read( - int cli_fd, char* snapshot_id, char* path, int flags) + int cli_fd, char* snapshot_id, char* path, int flags) { - struct roperation *rop = NULL; + struct roperation *rop = NULL; struct rimage *rimg = NULL; - rimg = get_rimg_by_name(snapshot_id, path); + rimg = get_rimg_by_name(snapshot_id, path); - // Check if we already have the image. + // Check if we already have the image. if (rimg == NULL) { pr_info("No image %s:%s.\n", path, snapshot_id); if (write_reply_header(cli_fd, ENOENT) < 0) { pr_perror("Error writing reply header for unexisting image"); - goto err; - } - close(cli_fd); + goto err; + } + close(cli_fd); return NULL; } @@ -532,16 +532,16 @@ struct roperation* handle_accept_proxy_read( goto err; } - rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); - if (rop == NULL) { + rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); + if (rop == NULL) { pr_perror("Error preparing remote operation"); - goto err; - } + goto err; + } rop_set_rimg(rop, rimg); return rop; err: - close(cli_fd); + close(cli_fd); return NULL; } @@ -556,10 +556,10 @@ void finish_local() } struct roperation* handle_accept_cache_read( - int cli_fd, char* snapshot_id, char* path, int flags) + int cli_fd, char* snapshot_id, char* path, int flags) { - struct rimage *rimg = NULL; - struct roperation *rop = NULL; + struct rimage *rimg = NULL; + struct roperation *rop = NULL; // Check if this is the restore finish message. if (!strncmp(path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) { @@ -568,12 +568,12 @@ struct roperation* handle_accept_cache_read( return NULL; } - rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); - if (rop == NULL) { - pr_perror("Error preparing remote operation"); - close(cli_fd); + rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); + if (rop == NULL) { + pr_perror("Error preparing remote operation"); + close(cli_fd); return NULL; - } + } // Check if we already have the image. rimg = get_rimg_by_name(snapshot_id, path); @@ -581,10 +581,10 @@ struct roperation* handle_accept_cache_read( if (write_reply_header(cli_fd, 0) < 0) { pr_perror("Error writing reply header for %s:%s", path, snapshot_id); - free(rop); + free(rop); close(rop->fd); } - rop_set_rimg(rop, rimg); + rop_set_rimg(rop, rimg); return rop; } // The file does not exist. @@ -592,7 +592,7 @@ struct roperation* handle_accept_cache_read( pr_info("No image %s:%s.\n", path, snapshot_id); if (write_reply_header(cli_fd, ENOENT) < 0) pr_perror("Error writing reply header for unexisting image"); - free(rop); + free(rop); close(cli_fd); } return NULL; @@ -602,8 +602,8 @@ void forward_remote_image(struct roperation* rop) { uint64_t ret = 0; - // Set blocking during the setup. - socket_set_blocking(rop->fd); + // Set blocking during the setup. + socket_set_blocking(rop->fd); ret = write_remote_header( rop->fd, rop->snapshot_id, rop->path, rop->flags, rop->size); @@ -617,30 +617,30 @@ void forward_remote_image(struct roperation* rop) pr_info("[fd=%d] Fowarding %s request for %s:%s (%lu bytes\n", rop->fd, rop->flags == O_RDONLY ? "read" : - rop->flags == O_APPEND ? "append" : "write", + rop->flags == O_APPEND ? "append" : "write", rop->path, rop->snapshot_id, rop->size); - // Go back to non-blocking - socket_set_non_blocking(rop->fd); + // Go back to non-blocking + socket_set_non_blocking(rop->fd); forwarding = true; - event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); + event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); } void handle_remote_accept(int fd) { - char path[PATHLEN]; + char path[PATHLEN]; char snapshot_id[PATHLEN]; int flags; - uint64_t size = 0; - uint64_t ret; + uint64_t size = 0; + uint64_t ret; struct roperation* rop = NULL; - // Set blocking during the setup. - socket_set_blocking(fd); + // Set blocking during the setup. + socket_set_blocking(fd); - ret = read_remote_header(fd, snapshot_id, path, &flags, &size); + ret = read_remote_header(fd, snapshot_id, path, &flags, &size); if (ret < 0) { pr_perror("Unable to receive remote header from image proxy"); goto err; @@ -652,18 +652,18 @@ void handle_remote_accept(int fd) return; } - // Go back to non-blocking - socket_set_non_blocking(fd); + // Go back to non-blocking + socket_set_non_blocking(fd); pr_info("[fd=%d] Received %s request for %s:%s with %lu bytes\n", fd, flags == O_RDONLY ? "read" : - flags == O_APPEND ? "append" : "write", + flags == O_APPEND ? "append" : "write", path, snapshot_id, size); forwarding = true; - rop = handle_accept_write(fd, snapshot_id, path, flags, false, size); + rop = handle_accept_write(fd, snapshot_id, path, flags, false, size); if (rop != NULL) { list_add_tail(&(rop->l), &rop_inprogress); @@ -671,15 +671,15 @@ void handle_remote_accept(int fd) } return; err: - close(fd); + close(fd); } void handle_local_accept(int fd) { int cli_fd; - char path[PATHLEN]; + char path[PATHLEN]; char snapshot_id[PATHLEN]; - int flags = 0; + int flags = 0; struct sockaddr_in cli_addr; socklen_t clilen = sizeof(cli_addr); struct roperation *rop = NULL; @@ -703,90 +703,90 @@ void handle_local_accept(int fd) // Write/Append case (only possible in img-proxy). if (flags != O_RDONLY) { - rop = handle_accept_proxy_write(cli_fd, snapshot_id, path, flags); + rop = handle_accept_proxy_write(cli_fd, snapshot_id, path, flags); } // Read case while restoring (img-cache). else if (restoring) { - rop = handle_accept_cache_read(cli_fd, snapshot_id, path, flags); + rop = handle_accept_cache_read(cli_fd, snapshot_id, path, flags); } // Read case while dumping (img-proxy). else { - rop = handle_accept_proxy_read(cli_fd, snapshot_id, path, flags); + rop = handle_accept_proxy_read(cli_fd, snapshot_id, path, flags); } - // If we have an operation. Check if we are ready to start or not. + // If we have an operation. Check if we are ready to start or not. if (rop != NULL) { if (rop->rimg != NULL) { - list_add_tail(&(rop->l), &rop_inprogress); - event_set( + list_add_tail(&(rop->l), &rop_inprogress); + event_set( epoll_fd, EPOLL_CTL_ADD, rop->fd, rop->flags == O_RDONLY ? EPOLLOUT : EPOLLIN, rop); } else { - list_add_tail(&(rop->l), &rop_pending); + list_add_tail(&(rop->l), &rop_pending); } socket_set_non_blocking(rop->fd); } - return; + return; err: - close(cli_fd); + close(cli_fd); } void finish_proxy_read(struct roperation* rop) { - // If finished forwarding image - if (rop->fd == proxy_to_cache_fd) { + // If finished forwarding image + if (rop->fd == proxy_to_cache_fd) { // Update fwd buffer and byte count on rimg. rop->rimg->curr_fwd_buf = rop->curr_sent_buf; rop->rimg->curr_fwd_bytes = rop->curr_sent_bytes; forwarding = false; - // If there are images waiting to be forwarded, forward the next. - if (!list_empty(&rop_forwarding)) { - forward_remote_image(list_entry(rop_forwarding.next, struct roperation, l)); - } - } + // If there are images waiting to be forwarded, forward the next. + if (!list_empty(&rop_forwarding)) { + forward_remote_image(list_entry(rop_forwarding.next, struct roperation, l)); + } + } } void finish_proxy_write(struct roperation* rop) { - // No more local images are comming. Close local socket. - if (!strncmp(rop->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { - // TODO - couldn't we handle the DUMP_FINISH in inside handle_accept_proxy_write? + // No more local images are comming. Close local socket. + if (!strncmp(rop->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { + // TODO - couldn't we handle the DUMP_FINISH in inside handle_accept_proxy_write? finish_local(); - } - // Normal image received, forward it. - else { - struct roperation *rop_to_forward = new_remote_operation( - rop->path, rop->snapshot_id, proxy_to_cache_fd, rop->flags, false); + } + // Normal image received, forward it. + else { + struct roperation *rop_to_forward = new_remote_operation( + rop->path, rop->snapshot_id, proxy_to_cache_fd, rop->flags, false); - // Add image to list of images. + // Add image to list of images. list_add_tail(&(rop->rimg->l), &rimg_head); - rop_set_rimg(rop_to_forward, rop->rimg); - if (list_empty(&rop_forwarding)) { - forward_remote_image(rop_to_forward); - } - list_add_tail(&(rop_to_forward->l), &rop_forwarding); - } + rop_set_rimg(rop_to_forward, rop->rimg); + if (list_empty(&rop_forwarding)) { + forward_remote_image(rop_to_forward); + } + list_add_tail(&(rop_to_forward->l), &rop_forwarding); + } } void finish_cache_write(struct roperation* rop) { - struct roperation *prop = get_rop_by_name( - &rop_pending, rop->snapshot_id, rop->path); + struct roperation *prop = get_rop_by_name( + &rop_pending, rop->snapshot_id, rop->path); forwarding = false; - event_set(epoll_fd, EPOLL_CTL_ADD, proxy_to_cache_fd, EPOLLIN, &proxy_to_cache_fd); + event_set(epoll_fd, EPOLL_CTL_ADD, proxy_to_cache_fd, EPOLLIN, &proxy_to_cache_fd); - // Add image to list of images. + // Add image to list of images. list_add_tail(&(rop->rimg->l), &rimg_head); - if (prop != NULL) { + if (prop != NULL) { pr_info("\t[fd=%d] Resuming pending %s for %s:%s\n", prop->fd, prop->flags == O_APPEND ? @@ -799,15 +799,15 @@ void finish_cache_write(struct roperation* rop) pr_perror("Error writing reply header for %s:%s", prop->path, prop->snapshot_id); close(prop->fd); - free(prop); + free(prop); return; } - rop_set_rimg(prop, rop->rimg); - list_del(&(prop->l)); + rop_set_rimg(prop, rop->rimg); + list_del(&(prop->l)); list_add_tail(&(prop->l), &rop_inprogress); - event_set(epoll_fd, EPOLL_CTL_ADD, prop->fd, EPOLLOUT, prop); - } + event_set(epoll_fd, EPOLL_CTL_ADD, prop->fd, EPOLLOUT, prop); + } } void handle_roperation(struct epoll_event *event, struct roperation *rop) @@ -823,74 +823,73 @@ void handle_roperation(struct epoll_event *event, struct roperation *rop) rop->fd, event->events, rop); - return; + return; } - // Remove rop from list (either in progress or forwarding). - list_del(&(rop->l)); + // Remove rop from list (either in progress or forwarding). + list_del(&(rop->l)); - // Operation is finished. - if (ret < 0) { - pr_perror("Unable to %s %s:%s (returned %ld)", + // Operation is finished. + if (ret < 0) { + pr_perror("Unable to %s %s:%s (returned %ld)", event->events & EPOLLOUT ? "send" : "receive", - rop->rimg->path, rop->rimg->snapshot_id, ret); - goto err; - } else { - pr_info("[fd=%d] Finished %s %s:%s to CRIU (size %ld)\n", + rop->rimg->path, rop->rimg->snapshot_id, ret); + goto err; + } else { + pr_info("[fd=%d] Finished %s %s:%s to CRIU (size %ld)\n", rop->fd, event->events & EPOLLOUT ? "sending" : "receiving", rop->rimg->path, rop->rimg->snapshot_id, rop->rimg->size); - } - - // If receive operation is finished - if (event->events & EPOLLIN) { - - // Cached side (finished receiving forwarded image) - if (restoring) { - finish_cache_write(rop); - } - // Proxy side (finished receiving local image) - else { - finish_proxy_write(rop); - } - } - // If send operation if finished - else { - // Proxy side (Finished forwarding image or reading it locally). - if (!restoring) - finish_proxy_read(rop); - // Nothing to be done when a read is finished on the cache side. - } + } + + // If receive operation is finished + if (event->events & EPOLLIN) { + // Cached side (finished receiving forwarded image) + if (restoring) { + finish_cache_write(rop); + } + // Proxy side (finished receiving local image) + else { + finish_proxy_write(rop); + } + } + // If send operation if finished + else { + // Proxy side (Finished forwarding image or reading it locally). + if (!restoring) + finish_proxy_read(rop); + // Nothing to be done when a read is finished on the cache side. + } err: - free(rop); + free(rop); } void check_pending_forwards() { - struct roperation *rop = NULL; - struct rimage *rimg = NULL; + struct roperation *rop = NULL; + struct rimage *rimg = NULL; list_for_each_entry(rop, &rop_forwarding, l) { - rimg = get_rimg_by_name(rop->snapshot_id, rop->path); - if (rimg != NULL) { - rop_set_rimg(rop, rimg); + rimg = get_rimg_by_name(rop->snapshot_id, rop->path); + if (rimg != NULL) { + rop_set_rimg(rop, rimg); forward_remote_image(rop); return; - } + } } } void check_pending_reads() { - struct roperation *rop = NULL; - struct rimage *rimg = NULL; + struct roperation *rop = NULL; + struct rimage *rimg = NULL; list_for_each_entry(rop, &rop_pending, l) { - rimg = get_rimg_by_name(rop->snapshot_id, rop->path); - if (rimg != NULL) { - rop_set_rimg(rop, rimg); - event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); - } + rimg = get_rimg_by_name(rop->snapshot_id, rop->path); + if (rimg != NULL) { + rop_set_rimg(rop, rimg); + event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); + } } } @@ -915,16 +914,16 @@ void accept_image_connections() { goto end; } - // Only if we are restoring (cache-side) we need to add the remote sock to - // the epoll. - if (restoring) { - ret = event_set(epoll_fd, EPOLL_CTL_ADD, proxy_to_cache_fd, - EPOLLIN, &proxy_to_cache_fd); - if (ret) { - pr_perror("Failed to add proxy to cache fd to epoll"); - goto end; - } - } + // Only if we are restoring (cache-side) we need to add the remote sock to + // the epoll. + if (restoring) { + ret = event_set(epoll_fd, EPOLL_CTL_ADD, proxy_to_cache_fd, + EPOLLIN, &proxy_to_cache_fd); + if (ret) { + pr_perror("Failed to add proxy to cache fd to epoll"); + goto end; + } + } while (1) { int n_events = epoll_wait(epoll_fd, events, EPOLL_MAX_EVENTS, 250); @@ -934,33 +933,33 @@ void accept_image_connections() { } for (int i = 0; i < n_events; i++) { - // Accept from local dump/restore? + // Accept from local dump/restore? if (events[i].data.ptr == &local_req_fd) { if ( events[i].events & EPOLLHUP || - events[i].events & EPOLLERR) { + events[i].events & EPOLLERR) { if (!finished_local) pr_perror("Unable to accept more local image connections"); goto end; } handle_local_accept(local_req_fd); } - else if (restoring && !forwarding && events[i].data.ptr == &proxy_to_cache_fd) { - event_set(epoll_fd, EPOLL_CTL_DEL, proxy_to_cache_fd, 0, 0); - handle_remote_accept(proxy_to_cache_fd); - } + else if (restoring && !forwarding && events[i].data.ptr == &proxy_to_cache_fd) { + event_set(epoll_fd, EPOLL_CTL_DEL, proxy_to_cache_fd, 0, 0); + handle_remote_accept(proxy_to_cache_fd); + } else { struct roperation *rop = (struct roperation*)events[i].data.ptr; - event_set(epoll_fd, EPOLL_CTL_DEL, rop->fd, 0, 0); + event_set(epoll_fd, EPOLL_CTL_DEL, rop->fd, 0, 0); handle_roperation(&events[i], rop); } } - // Check if there are any pending operations - if (restoring) - check_pending_reads(); + // Check if there are any pending operations + if (restoring) + check_pending_reads(); else if (!forwarding) - check_pending_forwards(); + check_pending_forwards(); // Check if we can close the tcp socket (this will unblock the cache // to answer "no image" to restore). @@ -972,12 +971,12 @@ void accept_image_connections() { finished_remote = true; } - // If both local and remote sockets are closed, leave. - if (finished_local && finished_remote) { + // If both local and remote sockets are closed, leave. + if (finished_local && finished_remote) { pr_info("Finished both local and remote, exiting\n"); - goto end; + goto end; } - } + } end: close(epoll_fd); close(local_req_fd); @@ -1000,8 +999,8 @@ int64_t recv_image_async(struct roperation *op) n = read(fd, curr_buf->buffer + curr_buf->nbytes, size ? - min((int) (size - rimg->size), BUF_SIZE - curr_buf->nbytes) : - BUF_SIZE - curr_buf->nbytes); + min((int) (size - rimg->size), BUF_SIZE - curr_buf->nbytes) : + BUF_SIZE - curr_buf->nbytes); if (n == 0) { if (close_fd) close(fd); @@ -1055,7 +1054,7 @@ int64_t send_image_async(struct roperation *op) op->curr_sent_bytes += n; if (op->curr_sent_bytes == BUF_SIZE) { op->curr_sent_buf = - list_entry(op->curr_sent_buf->l.next, struct rbuf, l); + list_entry(op->curr_sent_buf->l.next, struct rbuf, l); op->curr_sent_bytes = 0; return n; } diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index 029857f70e..c32e6ea58f 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -32,7 +32,7 @@ struct rbuf { }; struct rimage { - /* Path and snapshot id identify the image. */ + /* Path and snapshot id identify the image. */ char path[PATHLEN]; char snapshot_id[PATHLEN]; /* List anchor. */ @@ -53,8 +53,8 @@ struct roperation { struct list_head l; /* File descriptor being used. */ int fd; - /* Path and snapshot id identify the required image. */ - char path[PATHLEN]; + /* Path and snapshot id identify the required image. */ + char path[PATHLEN]; char snapshot_id[PATHLEN]; /* Remote image being used (may be null if the operation is pending). */ struct rimage *rimg; From f84538c73a26543d8d4c03d673e792f6f8de5221 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 15 May 2018 04:27:53 +0300 Subject: [PATCH 027/277] remote: a few minor fixes to make travis happy --- criu/img-remote.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index af4d88dcc0..9a7eec6a9f 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -600,7 +600,7 @@ struct roperation* handle_accept_cache_read( void forward_remote_image(struct roperation* rop) { - uint64_t ret = 0; + int64_t ret = 0; // Set blocking during the setup. socket_set_blocking(rop->fd); @@ -614,7 +614,7 @@ void forward_remote_image(struct roperation* rop) return; } - pr_info("[fd=%d] Fowarding %s request for %s:%s (%lu bytes\n", + pr_info("[fd=%d] Fowarding %s request for %s:%s (%" PRIu64 " bytes\n", rop->fd, rop->flags == O_RDONLY ? "read" : rop->flags == O_APPEND ? "append" : "write", @@ -634,7 +634,7 @@ void handle_remote_accept(int fd) char snapshot_id[PATHLEN]; int flags; uint64_t size = 0; - uint64_t ret; + int64_t ret; struct roperation* rop = NULL; // Set blocking during the setup. @@ -655,7 +655,7 @@ void handle_remote_accept(int fd) // Go back to non-blocking socket_set_non_blocking(fd); - pr_info("[fd=%d] Received %s request for %s:%s with %lu bytes\n", + pr_info("[fd=%d] Received %s request for %s:%s with %" PRIu64 " bytes\n", fd, flags == O_RDONLY ? "read" : flags == O_APPEND ? "append" : "write", @@ -831,12 +831,12 @@ void handle_roperation(struct epoll_event *event, struct roperation *rop) // Operation is finished. if (ret < 0) { - pr_perror("Unable to %s %s:%s (returned %ld)", + pr_perror("Unable to %s %s:%s (returned %" PRId64 ")", event->events & EPOLLOUT ? "send" : "receive", rop->rimg->path, rop->rimg->snapshot_id, ret); goto err; } else { - pr_info("[fd=%d] Finished %s %s:%s to CRIU (size %ld)\n", + pr_info("[fd=%d] Finished %s %s:%s to CRIU (size %" PRIu64 ")\n", rop->fd, event->events & EPOLLOUT ? "sending" : "receiving", rop->rimg->path, rop->rimg->snapshot_id, rop->rimg->size); @@ -926,13 +926,15 @@ void accept_image_connections() { } while (1) { - int n_events = epoll_wait(epoll_fd, events, EPOLL_MAX_EVENTS, 250); + int n_events, i; + + n_events = epoll_wait(epoll_fd, events, EPOLL_MAX_EVENTS, 250); if (n_events < 0) { pr_perror("Failed to epoll wait"); goto end; } - for (int i = 0; i < n_events; i++) { + for (i = 0; i < n_events; i++) { // Accept from local dump/restore? if (events[i].data.ptr == &local_req_fd) { if ( events[i].events & EPOLLHUP || From 36b628bbcb9db4ef38b95ecdd896801bbdcae1ab Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 14 Jun 2018 08:57:38 +0100 Subject: [PATCH 028/277] remote: Remove unused constants Drop the constants for default cache host/port and page size because they are not used anywhere. Signed-off-by: Radostin Stoyanov Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- criu/include/img-remote.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index c32e6ea58f..15e1bc5d5e 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -16,14 +16,9 @@ #define NULL_SNAPSHOT_ID "null" #define DEFAULT_CACHE_SOCKET "img-cache.sock" #define DEFAULT_PROXY_SOCKET "img-proxy.sock" -#define DEFAULT_CACHE_PORT 9996 -#define DEFAULT_CACHE_HOST "localhost" #define DEFAULT_LISTEN 50 -#ifndef PAGESIZE -#define PAGESIZE 4096 -#endif -#define BUF_SIZE PAGESIZE +#define BUF_SIZE 4096 struct rbuf { char buffer[BUF_SIZE]; From 1a8c65473f3b76d248c9920d494d6a5a680df042 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 20 Jan 2018 18:11:34 +0000 Subject: [PATCH 029/277] crtools: Fix typo Signed-off-by: Radostin Stoyanov --- criu/crtools.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/crtools.c b/criu/crtools.c index b8e074f81a..9cdbb98e61 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -288,7 +288,7 @@ int main(int argc, char *argv[], char *envp[]) " cpuinfo dump writes cpu information into image file\n" " cpuinfo check validates cpu information read from image file\n" " image-proxy launch dump-side proxy to sent images\n" -" image-cache launch restore-side cache to reveive images\n" +" image-cache launch restore-side cache to receive images\n" ); if (usage_error) { From be22689de25c897461ebaa166a05fb7070104845 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 1 Feb 2018 19:04:09 +0300 Subject: [PATCH 030/277] criu: fix leaks detected by coverity scan 1) fix sfle memory leak on get_fle_for_scm error 2) fix gfd open descriptor leak on get_fle_for_scm error 3-6) fix buf memory leak on read and pwrite errors Signed-off-by: Pavel Tikhomirov --- criu/files-reg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/criu/files-reg.c b/criu/files-reg.c index 97dc7030eb..1b51d1088e 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -166,10 +166,12 @@ static int copy_chunk_from_file(int fd, int img, off_t off, size_t len) ret = pread(fd, buf, min_t(size_t, BUFSIZE, len), off); if (ret <= 0) { pr_perror("Can't read from ghost file"); + xfree(buf); return -1; } if (write(img, buf, ret) != ret) { pr_perror("Can't write to image"); + xfree(buf); return -1; } off += ret; @@ -244,10 +246,12 @@ static int copy_chunk_to_file(int img, int fd, off_t off, size_t len) ret = read(img, buf, min_t(size_t, BUFSIZE, len)); if (ret <= 0) { pr_perror("Can't read from image"); + xfree(buf); return -1; } if (pwrite(fd, buf, ret, off) != ret) { pr_perror("Can't write to file"); + xfree(buf); return -1; } } else { From 91babe78f97ea1757dcf7c24da844eb42e285daa Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 2 Jun 2018 00:02:51 +0300 Subject: [PATCH 031/277] test: make zdtm.py python2/python3 compatible Cc: Adrian Reber Signed-off-by: Andrei Vagin --- test/zdtm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 04ed5307d3..3063157556 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1238,7 +1238,7 @@ def dump(self, action, opts=[]): if self.__remote: logdir = os.getcwd() + "/" + self.__dump_path + "/" + str(self.__iter) - print "Adding image cache" + print("Adding image cache") cache_opts = [self.__criu_bin, "image-cache", "--port", "12345", "-v4", "-o", logdir + "/image-cache.log", "-D", logdir] @@ -1246,7 +1246,7 @@ def dump(self, action, opts=[]): subprocess.Popen(cache_opts).pid time.sleep(1) - print "Adding image proxy" + print("Adding image proxy") proxy_opts = [self.__criu_bin, "image-proxy", "--port", "12345", "--address", "localhost", "-v4", "-o", logdir + "/image-proxy.log", From 2df652db853cdb14be9374f30f3ded988c101de0 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 16 Feb 2018 18:11:32 +0000 Subject: [PATCH 032/277] crtools: image-{cache, proxy} requires address/port Show error message when image-{cache,proxy} is called without --port and image-proxy without --address argument. Signed-off-by: Radostin Stoyanov --- criu/crtools.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index 9cdbb98e61..996ec2df1f 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -229,11 +229,21 @@ int main(int argc, char *argv[], char *envp[]) if (!strcmp(argv[optind], "page-server")) return cr_page_server(opts.daemon_mode, false, -1) != 0; - if (!strcmp(argv[optind], "image-cache")) + if (!strcmp(argv[optind], "image-cache")) { + if (!opts.port) + goto opt_port_missing; return image_cache(opts.daemon_mode, DEFAULT_CACHE_SOCKET, opts.port); + } - if (!strcmp(argv[optind], "image-proxy")) + if (!strcmp(argv[optind], "image-proxy")) { + if (!opts.addr) { + pr_msg("Error: address not specified\n"); + return 1; + } + if (!opts.port) + goto opt_port_missing; return image_proxy(opts.daemon_mode, DEFAULT_PROXY_SOCKET, opts.addr, opts.port); + } if (!strcmp(argv[optind], "service")) return cr_service(opts.daemon_mode); @@ -458,6 +468,10 @@ int main(int argc, char *argv[], char *envp[]) return 0; +opt_port_missing: + pr_msg("Error: port not specified\n"); + return 1; + opt_pid_missing: pr_msg("Error: pid not specified\n"); return 1; From 62c65391047c5d41baa59b06e8109e251d721dc6 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 4 Feb 2018 08:22:59 +0300 Subject: [PATCH 033/277] criu: fix gcc-8 warnings criu/sk-packet.c:443:3: error: 'strncpy' output may be truncated copying 14 bytes from a string of length 15 strncpy(addr_spkt.sa_data, req.ifr_name, sa_data_size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu/img-remote.c:383:3: error: 'strncpy' specified bound 4096 equals destination size strncpy(snapshot_id, li->snapshot_id, PATHLEN); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu/img-remote.c:384:3: error: 'strncpy' specified bound 4096 equals destination size strncpy(path, li->name, PATHLEN); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu/files.c:288:3: error: 'strncpy' output may be truncated copying 4095 bytes from a string of length 4096 strncpy(buf, link->name, PATH_MAX - 1); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu/sk-unix.c:239:36: error: '/' directive output may be truncated writing 1 byte into a region of size between 0 and 4095 snprintf(path, sizeof(path), ".%s/%s", dir, sk->name); ^ criu/sk-unix.c:239:3: note: 'snprintf' output 3 or more bytes (assuming 4098) into a destination of size 4096 snprintf(path, sizeof(path), ".%s/%s", dir, sk->name); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu/mount.c:2563:3: error: 'strncpy' specified bound 4096 equals destination size strncpy(path, m->mountpoint, PATH_MAX); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu/cr-restore.c:3647:2: error: 'strncpy' specified bound 16 equals destination size strncpy(task_args->comm, core->tc->comm, sizeof(task_args->comm)); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Andrei Vagin --- criu/img-remote.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 9a7eec6a9f..f148e23f3d 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -345,8 +345,10 @@ static int64_t read_header(int fd, char *snapshot_id, char *path, int *flags) int ret = pb_read_obj(fd, (void **)&li, PB_LOCAL_IMAGE); if (ret > 0) { - strncpy(snapshot_id, li->snapshot_id, PATHLEN); - strncpy(path, li->name, PATHLEN); + strncpy(snapshot_id, li->snapshot_id, PATHLEN - 1); + snapshot_id[PATHLEN - 1] = 0; + strncpy(path, li->name, PATHLEN - 1); + path[PATHLEN - 1] = 0; *flags = li->open_mode; } free(li); From f2054ba25548c78f94b49a4c36a1477b5e66adbc Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 12 Jul 2018 23:41:42 +0300 Subject: [PATCH 034/277] remote: don't read from pointer after free CID 190778 (#1 of 1): Read from pointer after free (USE_AFTER_FREE) 7. deref_after_free: Dereferencing freed pointer rop. Signed-off-by: Andrei Vagin --- criu/img-remote.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index f148e23f3d..a9140423b0 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -583,8 +583,8 @@ struct roperation* handle_accept_cache_read( if (write_reply_header(cli_fd, 0) < 0) { pr_perror("Error writing reply header for %s:%s", path, snapshot_id); - free(rop); close(rop->fd); + free(rop); } rop_set_rimg(rop, rimg); return rop; @@ -594,8 +594,8 @@ struct roperation* handle_accept_cache_read( pr_info("No image %s:%s.\n", path, snapshot_id); if (write_reply_header(cli_fd, ENOENT) < 0) pr_perror("Error writing reply header for unexisting image"); - free(rop); close(cli_fd); + free(rop); } return NULL; } From d170cca9a064d3820b965146a2ab8d380c057074 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 22 Aug 2018 20:54:31 +0100 Subject: [PATCH 035/277] img-proxy: Remove duplicated include Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/img-proxy.c | 1 - 1 file changed, 1 deletion(-) diff --git a/criu/img-proxy.c b/criu/img-proxy.c index c4d442e67d..b482284815 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -1,6 +1,5 @@ #include -#include "img-remote.h" #include "img-remote.h" #include "criu-log.h" #include From 383da037de17758ceb838fed1c4e5f407dd72279 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 22 Aug 2018 20:54:33 +0100 Subject: [PATCH 036/277] remote: Merge check_pending_{reads,forwards} Both functions check_pending_forwards() and check_pending_reads() have very similar functionality. This patch aims to reduce the duplication of code by merging both functions into check_pending() Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/img-remote.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index a9140423b0..972b18dff1 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -866,22 +866,7 @@ void handle_roperation(struct epoll_event *event, struct roperation *rop) free(rop); } -void check_pending_forwards() -{ - struct roperation *rop = NULL; - struct rimage *rimg = NULL; - - list_for_each_entry(rop, &rop_forwarding, l) { - rimg = get_rimg_by_name(rop->snapshot_id, rop->path); - if (rimg != NULL) { - rop_set_rimg(rop, rimg); - forward_remote_image(rop); - return; - } - } -} - -void check_pending_reads() +void check_pending() { struct roperation *rop = NULL; struct rimage *rimg = NULL; @@ -890,7 +875,12 @@ void check_pending_reads() rimg = get_rimg_by_name(rop->snapshot_id, rop->path); if (rimg != NULL) { rop_set_rimg(rop, rimg); - event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); + if (restoring) { + event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); + } else { + forward_remote_image(rop); + return; + } } } } @@ -960,10 +950,8 @@ void accept_image_connections() { } // Check if there are any pending operations - if (restoring) - check_pending_reads(); - else if (!forwarding) - check_pending_forwards(); + if (restoring || !forwarding) + check_pending(); // Check if we can close the tcp socket (this will unblock the cache // to answer "no image" to restore). From 69ed73692fad35806e27dedf3ba49b14c49f9f5c Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 30 Aug 2018 13:10:31 +0100 Subject: [PATCH 037/277] remote: Replace PATHLEN with PATH_MAX The macro PATHLEN is redundant. It is defined such that its replacement token sequence is the token PATH_MAX. Signed-off-by: Radostin Stoyanov Acked-by: Adrian Reber Signed-off-by: Andrei Vagin --- criu/img-remote.c | 58 +++++++++++++++++++-------------------- criu/include/img-remote.h | 9 +++--- 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 972b18dff1..579f5ed1d4 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -24,7 +24,7 @@ #include "protobuf.h" #include "image.h" -#define PB_LOCAL_IMAGE_SIZE PATHLEN +#define PB_LOCAL_IMAGE_SIZE PATH_MAX #define EPOLL_MAX_EVENTS 50 // List of images already in memory. @@ -72,7 +72,7 @@ struct epoll_event *events; * ID which corresponds to the working directory specified by the user. */ struct snapshot { - char snapshot_id[PATHLEN]; + char snapshot_id[PATH_MAX]; struct list_head l; }; @@ -83,8 +83,8 @@ struct snapshot *new_snapshot(char *snapshot_id) if (!s) return NULL; - strncpy(s->snapshot_id, snapshot_id, PATHLEN - 1); - s->snapshot_id[PATHLEN - 1]= '\0'; + strncpy(s->snapshot_id, snapshot_id, PATH_MAX - 1); + s->snapshot_id[PATH_MAX - 1]= '\0'; return s; } @@ -98,8 +98,8 @@ struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path) struct rimage *rimg = NULL; list_for_each_entry(rimg, &rimg_head, l) { - if (!strncmp(rimg->path, path, PATHLEN) && - !strncmp(rimg->snapshot_id, snapshot_id, PATHLEN)) { + if (!strncmp(rimg->path, path, PATH_MAX) && + !strncmp(rimg->snapshot_id, snapshot_id, PATH_MAX)) { return rimg; } } @@ -112,8 +112,8 @@ struct roperation *get_rop_by_name( struct roperation *rop = NULL; list_for_each_entry(rop, head, l) { - if (!strncmp(rop->path, path, PATHLEN) && - !strncmp(rop->snapshot_id, snapshot_id, PATHLEN)) { + if (!strncmp(rop->path, path, PATH_MAX) && + !strncmp(rop->snapshot_id, snapshot_id, PATH_MAX)) { return rop; } } @@ -345,10 +345,10 @@ static int64_t read_header(int fd, char *snapshot_id, char *path, int *flags) int ret = pb_read_obj(fd, (void **)&li, PB_LOCAL_IMAGE); if (ret > 0) { - strncpy(snapshot_id, li->snapshot_id, PATHLEN - 1); - snapshot_id[PATHLEN - 1] = 0; - strncpy(path, li->name, PATHLEN - 1); - path[PATHLEN - 1] = 0; + strncpy(snapshot_id, li->snapshot_id, PATH_MAX - 1); + snapshot_id[PATH_MAX - 1] = 0; + strncpy(path, li->name, PATH_MAX - 1); + path[PATH_MAX - 1] = 0; *flags = li->open_mode; } free(li); @@ -372,8 +372,8 @@ int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *flags, ui int ret = pb_read_obj(fd, (void **)&ri, PB_REMOTE_IMAGE); if (ret > 0) { - strncpy(snapshot_id, ri->snapshot_id, PATHLEN); - strncpy(path, ri->name, PATHLEN); + strncpy(snapshot_id, ri->snapshot_id, PATH_MAX); + strncpy(path, ri->name, PATH_MAX); *flags = ri->open_mode; *size = ri->size; } @@ -391,10 +391,10 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) goto err; } - strncpy(rimg->path, path, PATHLEN -1 ); - strncpy(rimg->snapshot_id, snapshot_id, PATHLEN - 1); - rimg->path[PATHLEN - 1] = '\0'; - rimg->snapshot_id[PATHLEN - 1] = '\0'; + strncpy(rimg->path, path, PATH_MAX -1 ); + strncpy(rimg->snapshot_id, snapshot_id, PATH_MAX - 1); + rimg->path[PATH_MAX - 1] = '\0'; + rimg->snapshot_id[PATH_MAX - 1] = '\0'; INIT_LIST_HEAD(&(rimg->buf_head)); list_add_tail(&(buf->l), &(rimg->buf_head)); rimg->curr_fwd_buf = buf; @@ -415,10 +415,10 @@ static struct roperation *new_remote_operation( pr_perror("Unable to allocate remote operation structures"); return NULL; } - strncpy(rop->path, path, PATHLEN -1 ); - strncpy(rop->snapshot_id, snapshot_id, PATHLEN - 1); - rop->path[PATHLEN - 1] = '\0'; - rop->snapshot_id[PATHLEN - 1] = '\0'; + strncpy(rop->path, path, PATH_MAX -1 ); + strncpy(rop->snapshot_id, snapshot_id, PATH_MAX - 1); + rop->path[PATH_MAX - 1] = '\0'; + rop->snapshot_id[PATH_MAX - 1] = '\0'; rop->fd = cli_fd; rop->flags = flags; rop->close_fd = close_fd; @@ -632,8 +632,8 @@ void forward_remote_image(struct roperation* rop) void handle_remote_accept(int fd) { - char path[PATHLEN]; - char snapshot_id[PATHLEN]; + char path[PATH_MAX]; + char snapshot_id[PATH_MAX]; int flags; uint64_t size = 0; int64_t ret; @@ -679,8 +679,8 @@ void handle_remote_accept(int fd) void handle_local_accept(int fd) { int cli_fd; - char path[PATHLEN]; - char snapshot_id[PATHLEN]; + char path[PATH_MAX]; + char snapshot_id[PATH_MAX]; int flags = 0; struct sockaddr_in cli_addr; socklen_t clilen = sizeof(cli_addr); @@ -1228,12 +1228,12 @@ int push_snapshot_id(void) return -1; } - rn.snapshot_id = xmalloc(sizeof(char) * PATHLEN); + rn.snapshot_id = xmalloc(sizeof(char) * PATH_MAX); if (!rn.snapshot_id) { close(sockfd); return -1; } - strncpy(rn.snapshot_id, snapshot_id, PATHLEN); + strncpy(rn.snapshot_id, snapshot_id, PATH_MAX); n = pb_write_obj(sockfd, &rn, PB_SNAPSHOT_ID); @@ -1261,7 +1261,7 @@ int get_curr_snapshot_id_idx(void) pull_snapshot_ids(); list_for_each_entry(si, &snapshot_head, l) { - if (!strncmp(si->snapshot_id, snapshot_id, PATHLEN)) + if (!strncmp(si->snapshot_id, snapshot_id, PATH_MAX)) return idx; idx++; } diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index 15e1bc5d5e..e19d06736f 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -9,7 +9,6 @@ #ifndef IMAGE_REMOTE_H #define IMAGE_REMOTE_H -#define PATHLEN PATH_MAX #define DUMP_FINISH "DUMP_FINISH" #define RESTORE_FINISH "RESTORE_FINISH" #define PARENT_IMG "parent" @@ -28,8 +27,8 @@ struct rbuf { struct rimage { /* Path and snapshot id identify the image. */ - char path[PATHLEN]; - char snapshot_id[PATHLEN]; + char path[PATH_MAX]; + char snapshot_id[PATH_MAX]; /* List anchor. */ struct list_head l; /* List of buffers that compose the image. */ @@ -49,8 +48,8 @@ struct roperation { /* File descriptor being used. */ int fd; /* Path and snapshot id identify the required image. */ - char path[PATHLEN]; - char snapshot_id[PATHLEN]; + char path[PATH_MAX]; + char snapshot_id[PATH_MAX]; /* Remote image being used (may be null if the operation is pending). */ struct rimage *rimg; /* Flags for the operation. */ From 68d49245e8311d4a079f5ba277d25df356efbb52 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 13 Sep 2018 18:28:05 +0100 Subject: [PATCH 038/277] remote: Refactor TCP server setup The function `setup_TCP_server_socket` (defined in img-remote.c) and `setup_tcp_server` (defined in util.c) have very similar functionality. Replace setup_TCP_server_socket() with setup_tcp_server() to reduce code duplication and to enable IPv6 support for the image-cache action of CRIU. We set SO_REUSEADDR flag to allow reuse of local addresses. Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/img-cache.c | 3 ++- criu/img-remote.c | 40 --------------------------------------- criu/include/img-remote.h | 1 - 3 files changed, 2 insertions(+), 42 deletions(-) diff --git a/criu/img-cache.c b/criu/img-cache.c index 98fdf80e6a..b93874c9f9 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -7,6 +7,7 @@ #include #include #include "cr_options.h" +#include "util.h" int accept_proxy_to_cache(int sockfd) { @@ -32,7 +33,7 @@ int image_cache(bool background, char *local_cache_path, unsigned short cache_wr proxy_to_cache_fd = opts.ps_socket; pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); } else { - proxy_to_cache_fd = setup_TCP_server_socket(cache_write_port); + proxy_to_cache_fd = setup_tcp_server("image cache", NULL, &cache_write_port); if (proxy_to_cache_fd < 0) { pr_perror("Unable to open proxy to cache TCP socket"); return -1; diff --git a/criu/img-remote.c b/criu/img-remote.c index 579f5ed1d4..ce7f1a670f 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -120,46 +120,6 @@ struct roperation *get_rop_by_name( return NULL; } -int setup_TCP_server_socket(int port) -{ - struct sockaddr_in serv_addr; - int sockopt = 1; - int sockfd = socket(AF_INET, SOCK_STREAM, 0); - - if (sockfd < 0) { - pr_perror("Unable to open image socket"); - return -1; - } - - bzero((char *) &serv_addr, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - serv_addr.sin_addr.s_addr = INADDR_ANY; - serv_addr.sin_port = htons(port); - - if (setsockopt( - sockfd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) == -1) { - pr_perror("Unable to set SO_REUSEADDR"); - goto err; - } - - if (bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { - pr_perror("Unable to bind image socket"); - goto err; - } - - if (listen(sockfd, DEFAULT_LISTEN)) { - pr_perror("Unable to listen image socket"); - goto err; - } - - return sockfd; -err: - close(sockfd); - return -1; -} - - - int setup_TCP_client_socket(char *hostname, int port) { int sockfd; diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index e19d06736f..d6297024a7 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -84,7 +84,6 @@ int64_t recv_image_async(struct roperation *op); int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *open_mode, uint64_t *size); int64_t write_remote_header(int fd, char *snapshot_id, char *path, int open_mode, uint64_t size); -int setup_TCP_server_socket(int port); int setup_TCP_client_socket(char *hostname, int port); int setup_UNIX_server_socket(char *path); void socket_set_non_blocking(int fd); From a713c2cefbdaea017101e0c4569b883ed587b70b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 13 Sep 2018 18:28:08 +0100 Subject: [PATCH 039/277] remote: Refactor TCP client setup Remove setup_TCP_client_socket() and use setup_tcp_server() instead as both functions have very similar functionality. Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/img-proxy.c | 3 ++- criu/img-remote.c | 36 ------------------------------------ criu/include/img-remote.h | 1 - criu/include/util.h | 2 +- criu/page-xfer.c | 2 +- criu/util.c | 8 ++++---- 6 files changed, 8 insertions(+), 44 deletions(-) diff --git a/criu/img-proxy.c b/criu/img-proxy.c index b482284815..65490d29f1 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -6,6 +6,7 @@ #include #include #include "cr_options.h" +#include "util.h" int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigned short fwd_port) { @@ -23,7 +24,7 @@ int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigne proxy_to_cache_fd = opts.ps_socket; pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); } else { - proxy_to_cache_fd = setup_TCP_client_socket(fwd_host, fwd_port); + proxy_to_cache_fd = setup_tcp_client(fwd_host, fwd_port); if (proxy_to_cache_fd < 0) { pr_perror("Unable to open proxy to cache TCP socket"); return -1; // TODO - should close other sockets. diff --git a/criu/img-remote.c b/criu/img-remote.c index ce7f1a670f..a83ce38f10 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -120,42 +120,6 @@ struct roperation *get_rop_by_name( return NULL; } -int setup_TCP_client_socket(char *hostname, int port) -{ - int sockfd; - struct sockaddr_in serv_addr; - struct hostent *server; - - sockfd = socket(AF_INET, SOCK_STREAM, 0); - if (sockfd < 0) { - pr_perror("Unable to open remote image socket"); - return -1; - } - - server = gethostbyname(hostname); - if (server == NULL) { - pr_perror("Unable to get host by name (%s)", hostname); - goto err; - } - - bzero((char *) &serv_addr, sizeof(serv_addr)); - serv_addr.sin_family = AF_INET; - bcopy((char *) server->h_addr, - (char *) &serv_addr.sin_addr.s_addr, - server->h_length); - serv_addr.sin_port = htons(port); - - if (connect(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { - pr_perror("Unable to connect to remote %s", hostname); - goto err; - } - - return sockfd; -err: - close(sockfd); - return -1; -} - int event_set(int epoll_fd, int op, int fd, uint32_t events, void *data) { int ret; diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index d6297024a7..ec4b15e7c3 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -84,7 +84,6 @@ int64_t recv_image_async(struct roperation *op); int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *open_mode, uint64_t *size); int64_t write_remote_header(int fd, char *snapshot_id, char *path, int open_mode, uint64_t size); -int setup_TCP_client_socket(char *hostname, int port); int setup_UNIX_server_socket(char *path); void socket_set_non_blocking(int fd); diff --git a/criu/include/util.h b/criu/include/util.h index 313aacd8c2..0fa03c7d8c 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -294,7 +294,7 @@ void print_data(unsigned long addr, unsigned char *data, size_t size); int setup_tcp_server(char *type, char *addr, unsigned short *port); int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk); -int setup_tcp_client(char *hostname); +int setup_tcp_client(char *hostname, unsigned short port); #define LAST_PID_PATH "sys/kernel/ns_last_pid" #define PID_MAX_PATH "sys/kernel/pid_max" diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 3e89feed9e..67abbf681d 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -1098,7 +1098,7 @@ static int connect_to_page_server(void) goto out; } - page_server_sk = setup_tcp_client(opts.addr); + page_server_sk = setup_tcp_client(opts.addr, opts.port); if (page_server_sk == -1) return -1; diff --git a/criu/util.c b/criu/util.c index 256fa15941..7a31828b96 100644 --- a/criu/util.c +++ b/criu/util.c @@ -1206,7 +1206,7 @@ int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) return -1; } -int setup_tcp_client(char *hostname) +int setup_tcp_client(char *hostname, unsigned short port) { struct sockaddr_storage saddr; struct addrinfo addr_criteria, *addr_list, *p; @@ -1243,9 +1243,9 @@ int setup_tcp_client(char *hostname) } inet_ntop(p->ai_family, ip, ipstr, sizeof(ipstr)); - pr_info("Connecting to server %s:%u\n", ipstr, opts.port); + pr_info("Connecting to server %s:%u\n", ipstr, port); - if (get_sockaddr_in(&saddr, ipstr, opts.port)) + if (get_sockaddr_in(&saddr, ipstr, port)) goto out; sk = socket(saddr.ss_family, SOCK_STREAM, IPPROTO_TCP); @@ -1255,7 +1255,7 @@ int setup_tcp_client(char *hostname) } if (connect(sk, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { - pr_info("Can't connect to server %s:%u\n", ipstr, opts.port); + pr_info("Can't connect to server %s:%u\n", ipstr, port); close(sk); sk = -1; } else { From b99a2743f6ee9533b0535fc2e464408223f15d06 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 13 Sep 2018 18:28:09 +0100 Subject: [PATCH 040/277] remote: pull_snapshot_ids: Simplify if condition Check only once if (sockfd < 0) Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/img-remote.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index a83ce38f10..9805c74179 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -1109,11 +1109,12 @@ static int pull_snapshot_ids(void) sockfd = read_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG); /* The connection was successful but there is not file. */ - if (sockfd < 0 && errno == ENOENT) + if (sockfd < 0) { + if (errno != ENOENT) { + pr_err("Unable to open snapshot id read connection\n"); + return -1; + } return 0; - else if (sockfd < 0) { - pr_err("Unable to open snapshot id read connection\n"); - return -1; } while (1) { From 3b9babcb71dec94e8b00f73caf688144d4d515e8 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 13 Sep 2018 18:28:10 +0100 Subject: [PATCH 041/277] remote: Use tmp file buffer when restore ip dump When CRIU calls the ip tool on restore, it passes the fd of remote socket by replacing the STDIN before execvp. The stdin is used by the ip tool to receive input. However, the ip tool calls ftell(stdin) which fails with "Illegal seek" since UNIX sockets do not support file positioning operations. To resolve this issue, read the received content from the UNIX socket and store it into temporary file, then replace STDIN with the fd of this tmp file. # python test/zdtm.py run -t zdtm/static/env00 --remote -f ns === Run 1/1 ================ zdtm/static/env00 ========================= Run zdtm/static/env00 in ns ========================== Start test ./env00 --pidfile=env00.pid --outfile=env00.out --envname=ENV_00_TEST Adding image cache Adding image proxy Run criu dump Run criu restore =[log]=> dump/zdtm/static/env00/31/1/restore.log ------------------------ grep Error ------------------------ RTNETLINK answers: File exists (00.229895) 1: do_open_remote_image RDONLY path=route-9.img snapshot_id=dump/zdtm/static/env00/31/1 (00.230316) 1: Running ip route restore Failed to restore: ftell: Illegal seek (00.232757) 1: Error (criu/util.c:712): exited, status=255 (00.232777) 1: Error (criu/net.c:1479): IP tool failed on route restore (00.232803) 1: Error (criu/net.c:2153): Can't create net_ns (00.255091) Error (criu/cr-restore.c:1177): 105 killed by signal 9: Killed (00.255307) Error (criu/mount.c:2960): mnt: Can't remove the directory /tmp/.criu.mntns.dTd7ak: No such file or directory (00.255339) Error (criu/cr-restore.c:2119): Restoring FAILED. ------------------------ ERROR OVER ------------------------ ################# Test zdtm/static/env00 FAIL at CRIU restore ################## ##################################### FAIL ##################################### Fixes #311 Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/net.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/criu/net.c b/criu/net.c index 44b0ce2242..b9f6669c3c 100644 --- a/criu/net.c +++ b/criu/net.c @@ -1919,19 +1919,46 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) static int restore_ip_dump(int type, int pid, char *cmd) { - int ret = -1; + int ret = -1, sockfd, n, written; + FILE *tmp_file; struct cr_img *img; + char buf[1024]; img = open_image(type, O_RSTR, pid); if (empty_image(img)) { close_image(img); return 0; } + sockfd = img_raw_fd(img); + tmp_file = tmpfile(); + if (!tmp_file) { + pr_perror("Failed to open tmpfile"); + return -1; + } + + while ((n = read(sockfd, buf, 1024)) > 0) { + written = fwrite(buf, sizeof(char), n, tmp_file); + if (written < n) { + pr_perror("Failed to write to tmpfile " + "[written: %d; total: %d]", written, n); + return -1; + } + } + + if (fseek(tmp_file, 0, SEEK_SET)) { + pr_perror("Failed to set file position to beginning of tmpfile"); + return -1; + } + if (img) { - ret = run_ip_tool(cmd, "restore", NULL, NULL, img_raw_fd(img), -1, 0); + ret = run_ip_tool(cmd, "restore", NULL, NULL, fileno(tmp_file), -1, 0); close_image(img); } + if(fclose(tmp_file)) { + pr_perror("Failed to close tmpfile"); + } + return ret; } From ae5a850d1bda2ca907a45c49122135b03b54b969 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 23 Sep 2018 15:31:54 +0100 Subject: [PATCH 042/277] python: Replace xrange with range In Py2 `range` returns a list and `xrange` creates a sequence object that evaluates lazily. In Py3 `range` is equivalent to `xrange` in Py2. Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- scripts/criu-ns | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/criu-ns b/scripts/criu-ns index e7ebbf0ca2..0910f2a336 100755 --- a/scripts/criu-ns +++ b/scripts/criu-ns @@ -124,7 +124,7 @@ def wrap_restore(): def get_varg(args): - for i in xrange(1, len(sys.argv)): + for i in range(1, len(sys.argv)): if not sys.argv[i] in args: continue From 2fffe9cd518e73e822d5a3984738d3e80376a338 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 28 Sep 2018 16:11:33 +0300 Subject: [PATCH 043/277] image: fix typo in debug message Signed-off-by: Pavel Tikhomirov Signed-off-by: Andrei Vagin --- criu/image.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/image.c b/criu/image.c index 8863952126..c21ac17741 100644 --- a/criu/image.c +++ b/criu/image.c @@ -420,7 +420,7 @@ int do_open_remote_image(int dfd, char *path, int flags) path, snapshot_id); ret = read_remote_image_connection(snapshot_id, path); } else { - pr_debug("do_open_remote_image WDONLY path=%s snapshot_id=%s\n", + pr_debug("do_open_remote_image WRONLY path=%s snapshot_id=%s\n", path, snapshot_id); ret = write_remote_image_connection(snapshot_id, path, O_WRONLY); } From 85f1a3e71e1a000a9d774cdec10ec136da5f5c69 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 24 Jan 2020 10:38:26 -0800 Subject: [PATCH 044/277] py: Reformat everything into pep8 style As discussed on the mailing list, current .py files formatting does not conform to the world standard, so we should better reformat it. For this the yapf tool is used. The command I used was yapf -i $(find -name *.py) Signed-off-by: Pavel Emelyanov --- test/zdtm.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 3063157556..9b93a51e73 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1237,20 +1237,25 @@ def dump(self, action, opts=[]): a_opts += self.__test.getdopts() if self.__remote: - logdir = os.getcwd() + "/" + self.__dump_path + "/" + str(self.__iter) + logdir = os.getcwd() + "/" + self.__dump_path + "/" + str( + self.__iter) print("Adding image cache") - cache_opts = [self.__criu_bin, "image-cache", "--port", "12345", "-v4", "-o", - logdir + "/image-cache.log", "-D", logdir] + cache_opts = [ + self.__criu_bin, "image-cache", "--port", "12345", "-v4", "-o", + logdir + "/image-cache.log", "-D", logdir + ] subprocess.Popen(cache_opts).pid time.sleep(1) print("Adding image proxy") - proxy_opts = [self.__criu_bin, "image-proxy", "--port", "12345", "--address", - "localhost", "-v4", "-o", logdir + "/image-proxy.log", - "-D", logdir] + proxy_opts = [ + self.__criu_bin, "image-proxy", "--port", "12345", "--address", + "localhost", "-v4", "-o", logdir + "/image-proxy.log", "-D", + logdir + ] subprocess.Popen(proxy_opts).pid time.sleep(1) From 0dbf262d9774043bfe12d400451bc075c97f0075 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 22 Aug 2018 20:54:30 +0100 Subject: [PATCH 045/277] Fix typos Most of the typos were found by codespell. Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/img-remote.c | 10 +++++----- criu/include/img-remote.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 9805c74179..96528dfeed 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -30,18 +30,18 @@ // List of images already in memory. LIST_HEAD(rimg_head); -// List of local operations currently in-progess. +// List of local operations currently in-progress. LIST_HEAD(rop_inprogress); // List of local operations pending (reads on the restore side for images that // still haven't arrived). - LIST_HEAD(rop_pending); + // List of images waiting to be forwarded. The head of the list is currently // being forwarded. LIST_HEAD(rop_forwarding); -// List of snapshots (useful when doing incremental restores/dumps +// List of snapshots (useful when doing incremental restores/dumps) LIST_HEAD(snapshot_head); // Snapshot id (setup at launch time by dump or restore). @@ -369,7 +369,7 @@ static void rop_set_rimg(struct roperation* rop, struct rimage* rimg) rop->curr_recv_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); } else { - // Writes or reads are simple. Just do it from the beginnig. + // Writes or reads are simple. Just do it from the beginning. rop->curr_recv_buf = list_entry(rimg->buf_head.next, struct rbuf, l); rop->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); rop->curr_sent_bytes = 0; @@ -540,7 +540,7 @@ void forward_remote_image(struct roperation* rop) return; } - pr_info("[fd=%d] Fowarding %s request for %s:%s (%" PRIu64 " bytes\n", + pr_info("[fd=%d] Forwarding %s request for %s:%s (%" PRIu64 " bytes\n", rop->fd, rop->flags == O_RDONLY ? "read" : rop->flags == O_APPEND ? "append" : "write", diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index ec4b15e7c3..bb70e81e5c 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -58,7 +58,7 @@ struct roperation { bool close_fd; /* Note: recv operation only. How much bytes should be received. */ uint64_t size; - /* Note: recv operation only. Buffer being writen. */ + /* Note: recv operation only. Buffer being written. */ struct rbuf *curr_recv_buf; // TODO - needed? Could be replaced by list.last! /* Note: send operation only. Pointer to buffer being sent. */ struct rbuf *curr_sent_buf; @@ -150,7 +150,7 @@ char *get_snapshot_id_from_idx(int idx); */ int push_snapshot_id(); -/* Returns the snapshot id index that preceeds the current snapshot_id. */ +/* Returns the snapshot id index that precedes the current snapshot_id. */ int get_curr_parent_snapshot_id_idx(); #endif From 88c337ec46b01e087dcaca42176a45ce20d89031 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 29 Jan 2019 17:37:45 +0000 Subject: [PATCH 046/277] remote: Fix code indentation This patch does not introduce any functional changes. Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/img-remote.c | 71 +++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 43 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 96528dfeed..5731fc1a86 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -359,16 +359,14 @@ static void rop_set_rimg(struct roperation* rop, struct rimage* rimg) if (rop->fd == proxy_to_cache_fd) { rop->curr_sent_buf = rimg->curr_fwd_buf; rop->curr_sent_bytes = rimg->curr_fwd_bytes; - } - // For local appends, just write at the end. - else { + } else { + // For local appends, just write at the end. rop->curr_sent_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); rop->curr_sent_bytes = rop->curr_sent_buf->nbytes; } // On the receiver size, we just append rop->curr_recv_buf = list_entry(rimg->buf_head.prev, struct rbuf, l); - } - else { + } else { // Writes or reads are simple. Just do it from the beginning. rop->curr_recv_buf = list_entry(rimg->buf_head.next, struct rbuf, l); rop->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); @@ -405,8 +403,7 @@ struct roperation* handle_accept_write( pr_perror("Error preparing remote image"); goto err; } - } - else { + } else { list_del(&(rimg->l)); if (flags == O_APPEND) clear_remote_image(rimg); @@ -512,9 +509,8 @@ struct roperation* handle_accept_cache_read( } rop_set_rimg(rop, rimg); return rop; - } - // The file does not exist. - else if (finished_remote) { + } else if (finished_remote) { + // The file does not exist. pr_info("No image %s:%s.\n", path, snapshot_id); if (write_reply_header(cli_fd, ENOENT) < 0) pr_perror("Error writing reply header for unexisting image"); @@ -630,13 +626,11 @@ void handle_local_accept(int fd) // Write/Append case (only possible in img-proxy). if (flags != O_RDONLY) { rop = handle_accept_proxy_write(cli_fd, snapshot_id, path, flags); - } - // Read case while restoring (img-cache). - else if (restoring) { + } else if (restoring) { + // Read case while restoring (img-cache). rop = handle_accept_cache_read(cli_fd, snapshot_id, path, flags); - } - // Read case while dumping (img-proxy). - else { + } else { + // Read case while dumping (img-proxy). rop = handle_accept_proxy_read(cli_fd, snapshot_id, path, flags); } @@ -671,8 +665,8 @@ void finish_proxy_read(struct roperation* rop) forwarding = false; - // If there are images waiting to be forwarded, forward the next. - if (!list_empty(&rop_forwarding)) { + // If there are images waiting to be forwarded, forward the next. + if (!list_empty(&rop_forwarding)) { forward_remote_image(list_entry(rop_forwarding.next, struct roperation, l)); } } @@ -684,9 +678,8 @@ void finish_proxy_write(struct roperation* rop) if (!strncmp(rop->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { // TODO - couldn't we handle the DUMP_FINISH in inside handle_accept_proxy_write? finish_local(); - } - // Normal image received, forward it. - else { + } else { + // Normal image received, forward it. struct roperation *rop_to_forward = new_remote_operation( rop->path, rop->snapshot_id, proxy_to_cache_fd, rop->flags, false); @@ -696,7 +689,7 @@ void finish_proxy_write(struct roperation* rop) rop_set_rimg(rop_to_forward, rop->rimg); if (list_empty(&rop_forwarding)) { forward_remote_image(rop_to_forward); - } + } list_add_tail(&(rop_to_forward->l), &rop_forwarding); } } @@ -773,14 +766,11 @@ void handle_roperation(struct epoll_event *event, struct roperation *rop) // Cached side (finished receiving forwarded image) if (restoring) { finish_cache_write(rop); - } - // Proxy side (finished receiving local image) - else { + } else { + // Proxy side (finished receiving local image) finish_proxy_write(rop); } - } - // If send operation if finished - else { + } else { // Proxy side (Finished forwarding image or reading it locally). if (!restoring) finish_proxy_read(rop); @@ -853,19 +843,17 @@ void accept_image_connections() { for (i = 0; i < n_events; i++) { // Accept from local dump/restore? if (events[i].data.ptr == &local_req_fd) { - if ( events[i].events & EPOLLHUP || + if (events[i].events & EPOLLHUP || events[i].events & EPOLLERR) { if (!finished_local) pr_perror("Unable to accept more local image connections"); goto end; } handle_local_accept(local_req_fd); - } - else if (restoring && !forwarding && events[i].data.ptr == &proxy_to_cache_fd) { + } else if (restoring && !forwarding && events[i].data.ptr == &proxy_to_cache_fd) { event_set(epoll_fd, EPOLL_CTL_DEL, proxy_to_cache_fd, 0, 0); handle_remote_accept(proxy_to_cache_fd); - } - else { + } else { struct roperation *rop = (struct roperation*)events[i].data.ptr; event_set(epoll_fd, EPOLL_CTL_DEL, rop->fd, 0, 0); @@ -973,22 +961,19 @@ int64_t send_image_async(struct roperation *op) list_entry(op->curr_sent_buf->l.next, struct rbuf, l); op->curr_sent_bytes = 0; return n; - } - else if (op->curr_sent_bytes == op->curr_sent_buf->nbytes) { + } else if (op->curr_sent_bytes == op->curr_sent_buf->nbytes) { if (close_fd) close(fd); return 0; } return n; - } - else if (errno == EPIPE || errno == ECONNRESET) { + } else if (errno == EPIPE || errno == ECONNRESET) { pr_warn("Connection for %s:%s was closed early than expected\n", rimg->path, rimg->snapshot_id); return 0; } else if (errno == EAGAIN || errno == EWOULDBLOCK) { return errno; - } - else { + } else { pr_perror("Write on %s:%s socket failed", rimg->path, rimg->snapshot_id); return -1; @@ -1016,9 +1001,9 @@ int read_remote_image_connection(char *snapshot_id, char *path) path, snapshot_id); return -1; } - if (!error || !strncmp(path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) + if (!error || !strncmp(path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) { return sockfd; - else if (error == ENOENT) { + } else if (error == ENOENT) { pr_info("Image does not exist (%s:%s)\n", path, snapshot_id); close(sockfd); return -ENOENT; @@ -1186,13 +1171,13 @@ int get_curr_snapshot_id_idx(void) pull_snapshot_ids(); list_for_each_entry(si, &snapshot_head, l) { - if (!strncmp(si->snapshot_id, snapshot_id, PATH_MAX)) + if (!strncmp(si->snapshot_id, snapshot_id, PATH_MAX)) return idx; idx++; } pr_err("Error, could not find current snapshot id (%s) fd\n", - snapshot_id); + snapshot_id); return -1; } From 96a823e2ebf6615d9ea01b297d848cb54a88fe23 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 29 Jan 2019 17:37:46 +0000 Subject: [PATCH 047/277] img-cache: Drop unnecessary accept_proxy_to_cache The function accept_proxy_to_cache() is a wrapper around accept(). Use accept() directly instead. Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/img-cache.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/criu/img-cache.c b/criu/img-cache.c index b93874c9f9..70c3a16632 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -9,20 +9,6 @@ #include "cr_options.h" #include "util.h" -int accept_proxy_to_cache(int sockfd) -{ - struct sockaddr_in cli_addr; - socklen_t clilen = sizeof(cli_addr); - int proxy_fd = accept(sockfd, (struct sockaddr *) &cli_addr, &clilen); - - if (proxy_fd < 0) { - pr_perror("Unable to accept remote image connection from image proxy"); - return -1; - } - - return proxy_fd; -} - int image_cache(bool background, char *local_cache_path, unsigned short cache_write_port) { pr_info("Proxy to Cache Port %d, CRIU to Cache Path %s\n", @@ -39,9 +25,12 @@ int image_cache(bool background, char *local_cache_path, unsigned short cache_wr return -1; } // Wait to accept connection from proxy. - proxy_to_cache_fd = accept_proxy_to_cache(proxy_to_cache_fd); - if (proxy_to_cache_fd < 0) + proxy_to_cache_fd = accept(proxy_to_cache_fd, NULL, 0); + if (proxy_to_cache_fd < 0) { + pr_perror("Unable to accept remote image connection" + " from image proxy"); return -1; // TODO - should close other sockets. + } } pr_info("Cache is connected to Proxy through fd %d\n", proxy_to_cache_fd); From 66f2f3119d5d6fe16d0f88a31592df3470dcdb07 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 29 Jan 2019 17:37:47 +0000 Subject: [PATCH 048/277] img-cache/proxy: Close sockets on failure Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/img-cache.c | 13 +++++++++---- criu/img-proxy.c | 3 ++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/criu/img-cache.c b/criu/img-cache.c index 70c3a16632..53da022ac7 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -11,6 +11,8 @@ int image_cache(bool background, char *local_cache_path, unsigned short cache_write_port) { + int tmp; + pr_info("Proxy to Cache Port %d, CRIU to Cache Path %s\n", cache_write_port, local_cache_path); restoring = true; @@ -25,12 +27,14 @@ int image_cache(bool background, char *local_cache_path, unsigned short cache_wr return -1; } // Wait to accept connection from proxy. - proxy_to_cache_fd = accept(proxy_to_cache_fd, NULL, 0); - if (proxy_to_cache_fd < 0) { + tmp = accept(proxy_to_cache_fd, NULL, 0); + if (tmp < 0) { pr_perror("Unable to accept remote image connection" " from image proxy"); - return -1; // TODO - should close other sockets. + close(proxy_to_cache_fd); + return -1; } + proxy_to_cache_fd = tmp; } pr_info("Cache is connected to Proxy through fd %d\n", proxy_to_cache_fd); @@ -38,7 +42,8 @@ int image_cache(bool background, char *local_cache_path, unsigned short cache_wr local_req_fd = setup_UNIX_server_socket(local_cache_path); if (local_req_fd < 0) { pr_perror("Unable to open cache to proxy UNIX socket"); - return -1; // TODO - should close other sockets. + close(proxy_to_cache_fd); + return -1; } diff --git a/criu/img-proxy.c b/criu/img-proxy.c index 65490d29f1..73a7263054 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -27,7 +27,8 @@ int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigne proxy_to_cache_fd = setup_tcp_client(fwd_host, fwd_port); if (proxy_to_cache_fd < 0) { pr_perror("Unable to open proxy to cache TCP socket"); - return -1; // TODO - should close other sockets. + close(local_req_fd); + return -1; } } From d2c44c353612045a431c69ab7a8f1dff28a30eb1 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 30 Jan 2019 00:07:59 +0000 Subject: [PATCH 049/277] remote: Drop unused PB_LOCAL_IMAGE_SIZE Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 1 - 1 file changed, 1 deletion(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 5731fc1a86..12e0c7f98c 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -24,7 +24,6 @@ #include "protobuf.h" #include "image.h" -#define PB_LOCAL_IMAGE_SIZE PATH_MAX #define EPOLL_MAX_EVENTS 50 // List of images already in memory. From a539277b107ed08c1803ab1071142b8d0457945d Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 8 Feb 2019 22:01:33 +0000 Subject: [PATCH 050/277] remote: Introduce strflags() macro Reduce code duplication by introducing a strflags() macro which maps a flag to corresponding string. Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 12e0c7f98c..045847a271 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -26,6 +26,9 @@ #define EPOLL_MAX_EVENTS 50 +#define strflags(f) ((f) == O_RDONLY ? "read" : \ + (f) == O_APPEND ? "append" : "write") + // List of images already in memory. LIST_HEAD(rimg_head); @@ -536,11 +539,8 @@ void forward_remote_image(struct roperation* rop) } pr_info("[fd=%d] Forwarding %s request for %s:%s (%" PRIu64 " bytes\n", - rop->fd, - rop->flags == O_RDONLY ? "read" : - rop->flags == O_APPEND ? "append" : "write", - rop->path, rop->snapshot_id, rop->size); - + rop->fd, strflags(rop->flags), rop->path, rop->snapshot_id, + rop->size); // Go back to non-blocking socket_set_non_blocking(rop->fd); @@ -577,10 +577,7 @@ void handle_remote_accept(int fd) socket_set_non_blocking(fd); pr_info("[fd=%d] Received %s request for %s:%s with %" PRIu64 " bytes\n", - fd, - flags == O_RDONLY ? "read" : - flags == O_APPEND ? "append" : "write", - path, snapshot_id, size); + fd, strflags(flags), path, snapshot_id, size); forwarding = true; @@ -617,10 +614,7 @@ void handle_local_accept(int fd) } pr_info("[fd=%d] Received %s request for %s:%s\n", - cli_fd, - flags == O_RDONLY ? "read" : - flags == O_APPEND ? "append" : "write", - path, snapshot_id); + cli_fd, strflags(flags), path, snapshot_id); // Write/Append case (only possible in img-proxy). if (flags != O_RDONLY) { @@ -706,10 +700,7 @@ void finish_cache_write(struct roperation* rop) if (prop != NULL) { pr_info("\t[fd=%d] Resuming pending %s for %s:%s\n", - prop->fd, - prop->flags == O_APPEND ? - "append" : prop->flags == O_RDONLY ? - "read" : "write", + prop->fd, strflags(prop->flags), prop->snapshot_id, prop->path); // Write header for pending image. From 7223ac710b359b829b6d6dfbc5fa6a072e2bb783 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 8 Feb 2019 22:51:48 +0000 Subject: [PATCH 051/277] remote: Fix stringop-truncation warning Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 045847a271..e6924f15f6 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -298,8 +298,8 @@ int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *flags, ui int ret = pb_read_obj(fd, (void **)&ri, PB_REMOTE_IMAGE); if (ret > 0) { - strncpy(snapshot_id, ri->snapshot_id, PATH_MAX); - strncpy(path, ri->name, PATH_MAX); + strncpy(snapshot_id, ri->snapshot_id, PATH_MAX - 1); + strncpy(path, ri->name, PATH_MAX - 1); *flags = ri->open_mode; *size = ri->size; } From 0a58954e5189145dbe17376d1dc1ff357e54020d Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 8 Feb 2019 22:57:08 +0000 Subject: [PATCH 052/277] remote: Fix 'flags' maybe uninitialized warning Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index e6924f15f6..f198ffaefd 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -553,7 +553,7 @@ void handle_remote_accept(int fd) { char path[PATH_MAX]; char snapshot_id[PATH_MAX]; - int flags; + int flags = 0; uint64_t size = 0; int64_t ret; struct roperation* rop = NULL; From 3058b3fb82255d024aed8cb23917183ba9789f74 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 8 Feb 2019 23:43:05 +0000 Subject: [PATCH 053/277] remote: Make private functions static This patch makes various private variables and procedures static. These changes conform to the following code style conventions: When declaring pointer data or a function that returns a pointer type, the preferred use of ``*`` is adjacent to the data name or function name and not adjacent to the type name. Statements longer than 80 columns will be broken into sensible chunks, unless exceeding 80 columns significantly increases readability and does not hide information. Descendants are always substantially shorter than the parent and are placed substantially to the right. The same applies to function headers with a long argument list. from https://www.kernel.org/doc/Documentation/process/coding-style.rst The function declarations {send,recv}_image() from img-remote.h are removed because they do not have a corresponding implementation. The following functions are made static because they are not used outside img-remote.c: * {send,recv}_image_async() * {read,write}_remote_header() * socket_set_non_blocking() Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 104 ++++++++++++++++++++------------------ criu/include/img-remote.h | 9 ---- 2 files changed, 55 insertions(+), 58 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index f198ffaefd..090e1e775f 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -30,21 +30,21 @@ (f) == O_APPEND ? "append" : "write") // List of images already in memory. -LIST_HEAD(rimg_head); +static LIST_HEAD(rimg_head); // List of local operations currently in-progress. -LIST_HEAD(rop_inprogress); +static LIST_HEAD(rop_inprogress); // List of local operations pending (reads on the restore side for images that // still haven't arrived). -LIST_HEAD(rop_pending); +static LIST_HEAD(rop_pending); // List of images waiting to be forwarded. The head of the list is currently // being forwarded. -LIST_HEAD(rop_forwarding); +static LIST_HEAD(rop_forwarding); // List of snapshots (useful when doing incremental restores/dumps) -LIST_HEAD(snapshot_head); +static LIST_HEAD(snapshot_head); // Snapshot id (setup at launch time by dump or restore). static char *snapshot_id; @@ -53,22 +53,24 @@ static char *snapshot_id; bool restoring = true; // True if the proxy to cache socket is being used (receiving or sending). -bool forwarding = false; +static bool forwarding = false; // True if the local dump or restore is finished. -bool finished_local = false; +static bool finished_local = false; // True if the communication between the proxy and cache can be closed. -bool finished_remote = false; +static bool finished_remote = false; // Proxy to cache socket fd; Local dump or restore servicing fd. int proxy_to_cache_fd; int local_req_fd; // Epoll fd and event array. -int epoll_fd; -struct epoll_event *events; +static int epoll_fd; +static struct epoll_event *events; +static int64_t recv_image_async(struct roperation *op); +static int64_t send_image_async(struct roperation *op); /* A snapshot is a dump or pre-dump operation. Each snapshot is identified by an * ID which corresponds to the working directory specified by the user. @@ -78,7 +80,7 @@ struct snapshot { struct list_head l; }; -struct snapshot *new_snapshot(char *snapshot_id) +static struct snapshot *new_snapshot(char *snapshot_id) { struct snapshot *s = xmalloc(sizeof(struct snapshot)); @@ -90,7 +92,7 @@ struct snapshot *new_snapshot(char *snapshot_id) return s; } -void add_snapshot(struct snapshot *snapshot) +static inline void add_snapshot(struct snapshot *snapshot) { list_add_tail(&(snapshot->l), &snapshot_head); } @@ -108,8 +110,8 @@ struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path) return NULL; } -struct roperation *get_rop_by_name( - struct list_head *head, const char *snapshot_id, const char *path) +static inline struct roperation *get_rop_by_name(struct list_head *head, + const char *snapshot_id, const char *path) { struct roperation *rop = NULL; @@ -122,7 +124,7 @@ struct roperation *get_rop_by_name( return NULL; } -int event_set(int epoll_fd, int op, int fd, uint32_t events, void *data) +static int event_set(int epoll_fd, int op, int fd, uint32_t events, void *data) { int ret; struct epoll_event event; @@ -135,7 +137,7 @@ int event_set(int epoll_fd, int op, int fd, uint32_t events, void *data) return ret; } -void socket_set_non_blocking(int fd) +static inline void socket_set_non_blocking(int fd) { int flags = fcntl(fd, F_GETFL, NULL); @@ -149,7 +151,7 @@ void socket_set_non_blocking(int fd) pr_perror("Failed to set flags for fd %d", fd); } -void socket_set_blocking(int fd) +static inline void socket_set_blocking(int fd) { int flags = fcntl(fd, F_GETFL, NULL); @@ -218,7 +220,7 @@ static int setup_UNIX_client_socket(char *path) return sockfd; } -static int64_t pb_write_obj(int fd, void *obj, int type) +static inline int64_t pb_write_obj(int fd, void *obj, int type) { struct cr_img img; @@ -227,7 +229,7 @@ static int64_t pb_write_obj(int fd, void *obj, int type) return pb_write_one(&img, obj, type); } -static int64_t pb_read_obj(int fd, void **pobj, int type) +static inline int64_t pb_read_obj(int fd, void **pobj, int type) { struct cr_img img; @@ -236,7 +238,8 @@ static int64_t pb_read_obj(int fd, void **pobj, int type) return do_pb_read_one(&img, pobj, type, true); } -static int64_t write_header(int fd, char *snapshot_id, char *path, int flags) +static inline int64_t write_header(int fd, char *snapshot_id, char *path, + int flags) { LocalImageEntry li = LOCAL_IMAGE_ENTRY__INIT; @@ -246,7 +249,7 @@ static int64_t write_header(int fd, char *snapshot_id, char *path, int flags) return pb_write_obj(fd, &li, PB_LOCAL_IMAGE); } -static int64_t write_reply_header(int fd, int error) +static inline int64_t write_reply_header(int fd, int error) { LocalImageReplyEntry lir = LOCAL_IMAGE_REPLY_ENTRY__INIT; @@ -254,7 +257,8 @@ static int64_t write_reply_header(int fd, int error) return pb_write_obj(fd, &lir, PB_LOCAL_IMAGE_REPLY); } -int64_t write_remote_header(int fd, char *snapshot_id, char *path, int flags, uint64_t size) +static inline int64_t write_remote_header(int fd, char *snapshot_id, + char *path, int flags, uint64_t size) { RemoteImageEntry ri = REMOTE_IMAGE_ENTRY__INIT; @@ -265,7 +269,8 @@ int64_t write_remote_header(int fd, char *snapshot_id, char *path, int flags, ui return pb_write_obj(fd, &ri, PB_REMOTE_IMAGE); } -static int64_t read_header(int fd, char *snapshot_id, char *path, int *flags) +static inline int64_t read_header(int fd, char *snapshot_id, char *path, + int *flags) { LocalImageEntry *li; int ret = pb_read_obj(fd, (void **)&li, PB_LOCAL_IMAGE); @@ -281,7 +286,7 @@ static int64_t read_header(int fd, char *snapshot_id, char *path, int *flags) return ret; } -static int64_t read_reply_header(int fd, int *error) +static inline int64_t read_reply_header(int fd, int *error) { LocalImageReplyEntry *lir; int ret = pb_read_obj(fd, (void **)&lir, PB_LOCAL_IMAGE_REPLY); @@ -292,7 +297,8 @@ static int64_t read_reply_header(int fd, int *error) return ret; } -int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *flags, uint64_t *size) +static inline int64_t read_remote_header(int fd, char *snapshot_id, char *path, + int *flags, uint64_t *size) { RemoteImageEntry *ri; int ret = pb_read_obj(fd, (void **)&ri, PB_REMOTE_IMAGE); @@ -332,8 +338,8 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) return NULL; } -static struct roperation *new_remote_operation( - char *path, char *snapshot_id, int cli_fd, int flags, bool close_fd) +static struct roperation *new_remote_operation(char *path, + char *snapshot_id, int cli_fd, int flags, bool close_fd) { struct roperation *rop = calloc(1, sizeof(struct roperation)); @@ -352,7 +358,7 @@ static struct roperation *new_remote_operation( return rop; } -static void rop_set_rimg(struct roperation* rop, struct rimage* rimg) +static inline void rop_set_rimg(struct roperation *rop, struct rimage *rimg) { rop->rimg = rimg; rop->size = rimg->size; @@ -373,12 +379,11 @@ static void rop_set_rimg(struct roperation* rop, struct rimage* rimg) rop->curr_recv_buf = list_entry(rimg->buf_head.next, struct rbuf, l); rop->curr_sent_buf = list_entry(rimg->buf_head.next, struct rbuf, l); rop->curr_sent_bytes = 0; - } } /* Clears a remote image struct for reusing it. */ -static struct rimage *clear_remote_image(struct rimage *rimg) +static inline struct rimage *clear_remote_image(struct rimage *rimg) { while (!list_is_singular(&(rimg->buf_head))) { struct rbuf *buf = list_entry(rimg->buf_head.prev, struct rbuf, l); @@ -393,8 +398,8 @@ static struct rimage *clear_remote_image(struct rimage *rimg) return rimg; } -struct roperation* handle_accept_write( - int cli_fd, char* snapshot_id, char* path, int flags, bool close_fd, uint64_t size) +static struct roperation *handle_accept_write(int cli_fd, char *snapshot_id, + char *path, int flags, bool close_fd, uint64_t size) { struct roperation *rop = NULL; struct rimage *rimg = get_rimg_by_name(snapshot_id, path); @@ -426,14 +431,14 @@ struct roperation* handle_accept_write( return NULL; } -struct roperation* handle_accept_proxy_write( - int cli_fd, char* snapshot_id, char* path, int flags) +static inline struct roperation *handle_accept_proxy_write(int cli_fd, + char *snapshot_id, char *path, int flags) { return handle_accept_write(cli_fd, snapshot_id, path, flags, true, 0); } -struct roperation* handle_accept_proxy_read( - int cli_fd, char* snapshot_id, char* path, int flags) +static struct roperation *handle_accept_proxy_read(int cli_fd, + char *snapshot_id, char *path, int flags) { struct roperation *rop = NULL; struct rimage *rimg = NULL; @@ -470,7 +475,7 @@ struct roperation* handle_accept_proxy_read( return NULL; } -void finish_local() +static inline void finish_local() { int ret; finished_local = true; @@ -480,8 +485,8 @@ void finish_local() } } -struct roperation* handle_accept_cache_read( - int cli_fd, char* snapshot_id, char* path, int flags) +static struct roperation *handle_accept_cache_read(int cli_fd, + char *snapshot_id, char *path, int flags) { struct rimage *rimg = NULL; struct roperation *rop = NULL; @@ -522,7 +527,7 @@ struct roperation* handle_accept_cache_read( return NULL; } -void forward_remote_image(struct roperation* rop) +static void forward_remote_image(struct roperation *rop) { int64_t ret = 0; @@ -549,7 +554,7 @@ void forward_remote_image(struct roperation* rop) event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); } -void handle_remote_accept(int fd) +static void handle_remote_accept(int fd) { char path[PATH_MAX]; char snapshot_id[PATH_MAX]; @@ -592,7 +597,7 @@ void handle_remote_accept(int fd) close(fd); } -void handle_local_accept(int fd) +static void handle_local_accept(int fd) { int cli_fd; char path[PATH_MAX]; @@ -648,7 +653,7 @@ void handle_local_accept(int fd) close(cli_fd); } -void finish_proxy_read(struct roperation* rop) +static inline void finish_proxy_read(struct roperation *rop) { // If finished forwarding image if (rop->fd == proxy_to_cache_fd) { @@ -665,7 +670,7 @@ void finish_proxy_read(struct roperation* rop) } } -void finish_proxy_write(struct roperation* rop) +static inline void finish_proxy_write(struct roperation *rop) { // No more local images are comming. Close local socket. if (!strncmp(rop->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { @@ -687,7 +692,7 @@ void finish_proxy_write(struct roperation* rop) } } -void finish_cache_write(struct roperation* rop) +static void finish_cache_write(struct roperation *rop) { struct roperation *prop = get_rop_by_name( &rop_pending, rop->snapshot_id, rop->path); @@ -719,7 +724,8 @@ void finish_cache_write(struct roperation* rop) } } -void handle_roperation(struct epoll_event *event, struct roperation *rop) +static void handle_roperation(struct epoll_event *event, + struct roperation *rop) { int64_t ret = (EPOLLOUT & event->events) ? send_image_async(rop) : @@ -770,7 +776,7 @@ void handle_roperation(struct epoll_event *event, struct roperation *rop) free(rop); } -void check_pending() +static void check_pending() { struct roperation *rop = NULL; struct rimage *rimg = NULL; @@ -881,7 +887,7 @@ void accept_image_connections() { /* Note: size is a limit on how much we want to read from the socket. Zero means * read until the socket is closed. */ -int64_t recv_image_async(struct roperation *op) +static int64_t recv_image_async(struct roperation *op) { int fd = op->fd; struct rimage *rimg = op->rimg; @@ -932,7 +938,7 @@ int64_t recv_image_async(struct roperation *op) return n; } -int64_t send_image_async(struct roperation *op) +static int64_t send_image_async(struct roperation *op) { int fd = op->fd; struct rimage *rimg = op->rimg; diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index bb70e81e5c..38a03deab3 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -76,16 +76,7 @@ extern bool restoring; void accept_image_connections(); struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path); -int64_t send_image(int fd, struct rimage *rimg, int flags, bool image_check); -int64_t send_image_async(struct roperation *op); -int64_t recv_image(int fd, struct rimage *rimg, uint64_t size, int flags, bool image_check); -int64_t recv_image_async(struct roperation *op); - -int64_t read_remote_header(int fd, char *snapshot_id, char *path, int *open_mode, uint64_t *size); -int64_t write_remote_header(int fd, char *snapshot_id, char *path, int open_mode, uint64_t size); - int setup_UNIX_server_socket(char *path); -void socket_set_non_blocking(int fd); /* Called by restore to get the fd correspondent to a particular path. This call * will block until the connection is received. From d2d254c5cf4aced6aba2737c40f7f6aecbde9aeb Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 9 Feb 2019 00:35:39 +0000 Subject: [PATCH 054/277] remote: Use xmalloc/xzalloc/xfree There is no need to print an error message, __xalloc() does that. Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 090e1e775f..74c5261bf2 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -315,13 +315,11 @@ static inline int64_t read_remote_header(int fd, char *snapshot_id, char *path, static struct rimage *new_remote_image(char *path, char *snapshot_id) { - struct rimage *rimg = calloc(1, sizeof(struct rimage)); - struct rbuf *buf = calloc(1, sizeof(struct rbuf)); + struct rimage *rimg = xzalloc(sizeof(struct rimage)); + struct rbuf *buf = xzalloc(sizeof(struct rbuf)); - if (rimg == NULL || buf == NULL) { - pr_perror("Unable to allocate remote image structures"); + if (rimg == NULL || buf == NULL) goto err; - } strncpy(rimg->path, path, PATH_MAX -1 ); strncpy(rimg->snapshot_id, snapshot_id, PATH_MAX - 1); @@ -333,20 +331,19 @@ static struct rimage *new_remote_image(char *path, char *snapshot_id) return rimg; err: - free(rimg); - free(buf); + xfree(rimg); + xfree(buf); return NULL; } static struct roperation *new_remote_operation(char *path, char *snapshot_id, int cli_fd, int flags, bool close_fd) { - struct roperation *rop = calloc(1, sizeof(struct roperation)); + struct roperation *rop = xzalloc(sizeof(struct roperation)); - if (rop == NULL) { - pr_perror("Unable to allocate remote operation structures"); + if (rop == NULL) return NULL; - } + strncpy(rop->path, path, PATH_MAX -1 ); strncpy(rop->snapshot_id, snapshot_id, PATH_MAX - 1); rop->path[PATH_MAX - 1] = '\0'; @@ -389,7 +386,7 @@ static inline struct rimage *clear_remote_image(struct rimage *rimg) struct rbuf *buf = list_entry(rimg->buf_head.prev, struct rbuf, l); list_del(rimg->buf_head.prev); - free(buf); + xfree(buf); } list_entry(rimg->buf_head.next, struct rbuf, l)->nbytes = 0; @@ -426,8 +423,8 @@ static struct roperation *handle_accept_write(int cli_fd, char *snapshot_id, rop->size = size; return rop; err: - free(rimg); - free(rop); + xfree(rimg); + xfree(rop); return NULL; } @@ -512,7 +509,7 @@ static struct roperation *handle_accept_cache_read(int cli_fd, pr_perror("Error writing reply header for %s:%s", path, snapshot_id); close(rop->fd); - free(rop); + xfree(rop); } rop_set_rimg(rop, rimg); return rop; @@ -522,7 +519,7 @@ static struct roperation *handle_accept_cache_read(int cli_fd, if (write_reply_header(cli_fd, ENOENT) < 0) pr_perror("Error writing reply header for unexisting image"); close(cli_fd); - free(rop); + xfree(rop); } return NULL; } @@ -713,7 +710,7 @@ static void finish_cache_write(struct roperation *rop) pr_perror("Error writing reply header for %s:%s", prop->path, prop->snapshot_id); close(prop->fd); - free(prop); + xfree(prop); return; } @@ -773,7 +770,7 @@ static void handle_roperation(struct epoll_event *event, // Nothing to be done when a read is finished on the cache side. } err: - free(rop); + xfree(rop); } static void check_pending() @@ -909,9 +906,8 @@ static int64_t recv_image_async(struct roperation *op) curr_buf->nbytes += n; rimg->size += n; if (curr_buf->nbytes == BUF_SIZE) { - struct rbuf *buf = malloc(sizeof(struct rbuf)); + struct rbuf *buf = xmalloc(sizeof(struct rbuf)); if (buf == NULL) { - pr_perror("Unable to allocate remote_buffer structures"); if (close_fd) close(fd); return -1; From 7f63b26483e1859acc20ed29cb288a819d2ebbc1 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 11 Feb 2019 08:39:05 +0000 Subject: [PATCH 055/277] remote: Ignore interupt signals When an interrupt signal (even SIGWINCH when strace is used) is received while epoll_wait() sleeps, it will return a value of -1 and set errno to EINTR, which is not an error and should be ignored. Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 74c5261bf2..13445b9833 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -828,7 +828,9 @@ void accept_image_connections() { int n_events, i; n_events = epoll_wait(epoll_fd, events, EPOLL_MAX_EVENTS, 250); - if (n_events < 0) { + + /* epoll_wait isn't restarted after interrupted by a signal */ + if (n_events < 0 && errno != EINTR) { pr_perror("Failed to epoll wait"); goto end; } From f428d3037af96aa28e8897972be9c913c04e5780 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 13 Feb 2019 13:03:58 +0000 Subject: [PATCH 056/277] util: Don't pass address/port as arguments There is no need to pass the values for address and port as arguments when creating a TCP server. The external `opts` object, which provides opts.addr and opts.port, is accessible in all components that require these values. With this change, a value specified with the `--address` option will used by image-cache in the same way as with page-server. Example: criu image-cache --address 127.0.0.1 --port 1234 criu page-server --address 127.0.0.1 --port 1234 Signed-off-by: Radostin Stoyanov --- criu/crtools.c | 4 ++-- criu/img-cache.c | 8 ++++---- criu/img-proxy.c | 8 ++++---- criu/include/img-remote.h | 4 ++-- criu/include/util.h | 4 ++-- criu/page-xfer.c | 4 ++-- criu/util.c | 35 +++++++++++++++++------------------ 7 files changed, 33 insertions(+), 34 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index 996ec2df1f..97a6d6d6c3 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -232,7 +232,7 @@ int main(int argc, char *argv[], char *envp[]) if (!strcmp(argv[optind], "image-cache")) { if (!opts.port) goto opt_port_missing; - return image_cache(opts.daemon_mode, DEFAULT_CACHE_SOCKET, opts.port); + return image_cache(opts.daemon_mode, DEFAULT_CACHE_SOCKET); } if (!strcmp(argv[optind], "image-proxy")) { @@ -242,7 +242,7 @@ int main(int argc, char *argv[], char *envp[]) } if (!opts.port) goto opt_port_missing; - return image_proxy(opts.daemon_mode, DEFAULT_PROXY_SOCKET, opts.addr, opts.port); + return image_proxy(opts.daemon_mode, DEFAULT_PROXY_SOCKET); } if (!strcmp(argv[optind], "service")) diff --git a/criu/img-cache.c b/criu/img-cache.c index 53da022ac7..4c1e6ebe7f 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -9,19 +9,19 @@ #include "cr_options.h" #include "util.h" -int image_cache(bool background, char *local_cache_path, unsigned short cache_write_port) +int image_cache(bool background, char *local_cache_path) { int tmp; - pr_info("Proxy to Cache Port %d, CRIU to Cache Path %s\n", - cache_write_port, local_cache_path); + pr_info("Proxy to Cache Port %u, CRIU to Cache Path %s\n", + opts.port, local_cache_path); restoring = true; if (opts.ps_socket != -1) { proxy_to_cache_fd = opts.ps_socket; pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); } else { - proxy_to_cache_fd = setup_tcp_server("image cache", NULL, &cache_write_port); + proxy_to_cache_fd = setup_tcp_server("image cache"); if (proxy_to_cache_fd < 0) { pr_perror("Unable to open proxy to cache TCP socket"); return -1; diff --git a/criu/img-proxy.c b/criu/img-proxy.c index 73a7263054..b11ab83e0c 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -8,10 +8,10 @@ #include "cr_options.h" #include "util.h" -int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigned short fwd_port) +int image_proxy(bool background, char *local_proxy_path) { - pr_info("CRIU to Proxy Path: %s, Cache Address %s:%hu\n", - local_proxy_path, fwd_host, fwd_port); + pr_info("CRIU to Proxy Path: %s, Cache Address %s:%u\n", + local_proxy_path, opts.addr, opts.port); restoring = false; local_req_fd = setup_UNIX_server_socket(local_proxy_path); @@ -24,7 +24,7 @@ int image_proxy(bool background, char *local_proxy_path, char *fwd_host, unsigne proxy_to_cache_fd = opts.ps_socket; pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); } else { - proxy_to_cache_fd = setup_tcp_client(fwd_host, fwd_port); + proxy_to_cache_fd = setup_tcp_client(); if (proxy_to_cache_fd < 0) { pr_perror("Unable to open proxy to cache TCP socket"); close(local_req_fd); diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index 38a03deab3..e853d3552b 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -98,13 +98,13 @@ int finish_remote_restore(); /* Starts an image proxy daemon (dump side). It receives image files through * socket connections and forwards them to the image cache (restore side). */ -int image_proxy(bool background, char *local_proxy_path, char *cache_host, unsigned short cache_port); +int image_proxy(bool background, char *local_proxy_path); /* Starts an image cache daemon (restore side). It receives image files through * socket connections and caches them until they are requested by the restore * process. */ -int image_cache(bool background, char *local_cache_path, unsigned short cache_port); +int image_cache(bool background, char *local_cache_path); /* Reads (discards) 'len' bytes from fd. This is used to emulate the function * lseek, which is used to advance the file needle. diff --git a/criu/include/util.h b/criu/include/util.h index 0fa03c7d8c..fe6ee4427a 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -292,9 +292,9 @@ char *xsprintf(const char *fmt, ...) void print_data(unsigned long addr, unsigned char *data, size_t size); -int setup_tcp_server(char *type, char *addr, unsigned short *port); +int setup_tcp_server(char *type); int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk); -int setup_tcp_client(char *hostname, unsigned short port); +int setup_tcp_client(void); #define LAST_PID_PATH "sys/kernel/ns_last_pid" #define PID_MAX_PATH "sys/kernel/pid_max" diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 67abbf681d..fe457d2017 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -1052,7 +1052,7 @@ int cr_page_server(bool daemon_mode, bool lazy_dump, int cfd) goto no_server; } - sk = setup_tcp_server("page", opts.addr, &opts.port); + sk = setup_tcp_server("page"); if (sk == -1) return -1; no_server: @@ -1098,7 +1098,7 @@ static int connect_to_page_server(void) goto out; } - page_server_sk = setup_tcp_client(opts.addr, opts.port); + page_server_sk = setup_tcp_client(); if (page_server_sk == -1) return -1; diff --git a/criu/util.c b/criu/util.c index 7a31828b96..b08d08d1c2 100644 --- a/criu/util.c +++ b/criu/util.c @@ -1073,8 +1073,7 @@ void print_data(unsigned long addr, unsigned char *data, size_t size) } } -static int get_sockaddr_in(struct sockaddr_storage *addr, char *host, - unsigned short port) +static int get_sockaddr_in(struct sockaddr_storage *addr, char *host) { memset(addr, 0, sizeof(*addr)); @@ -1092,26 +1091,26 @@ static int get_sockaddr_in(struct sockaddr_storage *addr, char *host, } if (addr->ss_family == AF_INET6) { - ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); + ((struct sockaddr_in6 *)addr)->sin6_port = htons(opts.port); } else if (addr->ss_family == AF_INET) { - ((struct sockaddr_in *)addr)->sin_port = htons(port); + ((struct sockaddr_in *)addr)->sin_port = htons(opts.port); } return 0; } -int setup_tcp_server(char *type, char *addr, unsigned short *port) +int setup_tcp_server(char *type) { int sk = -1; int sockopt = 1; struct sockaddr_storage saddr; socklen_t slen = sizeof(saddr); - if (get_sockaddr_in(&saddr, addr, (*port))) { + if (get_sockaddr_in(&saddr, opts.addr)) { return -1; } - pr_info("Starting %s server on port %u\n", type, *port); + pr_info("Starting %s server on port %u\n", type, opts.port); sk = socket(saddr.ss_family, SOCK_STREAM, IPPROTO_TCP); @@ -1137,19 +1136,19 @@ int setup_tcp_server(char *type, char *addr, unsigned short *port) } /* Get socket port in case of autobind */ - if ((*port) == 0) { + if (opts.port == 0) { if (getsockname(sk, (struct sockaddr *)&saddr, &slen)) { pr_perror("Can't get %s server name", type); goto out; } if (saddr.ss_family == AF_INET6) { - (*port) = ntohs(((struct sockaddr_in *)&saddr)->sin_port); + opts.port = ntohs(((struct sockaddr_in *)&saddr)->sin_port); } else if (saddr.ss_family == AF_INET) { - (*port) = ntohs(((struct sockaddr_in6 *)&saddr)->sin6_port); + opts.port = ntohs(((struct sockaddr_in6 *)&saddr)->sin6_port); } - pr_info("Using %u port\n", (*port)); + pr_info("Using %u port\n", opts.port); } return sk; @@ -1206,7 +1205,7 @@ int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) return -1; } -int setup_tcp_client(char *hostname, unsigned short port) +int setup_tcp_client(void) { struct sockaddr_storage saddr; struct addrinfo addr_criteria, *addr_list, *p; @@ -1221,10 +1220,10 @@ int setup_tcp_client(char *hostname, unsigned short port) /* * addr_list contains a list of addrinfo structures that corresponding - * to the criteria specified in hostname and addr_criteria. + * to the criteria specified in opts.addr and addr_criteria. */ - if (getaddrinfo(hostname, NULL, &addr_criteria, &addr_list)) { - pr_perror("Failed to resolve hostname: %s", hostname); + if (getaddrinfo(opts.addr, NULL, &addr_criteria, &addr_list)) { + pr_perror("Failed to resolve hostname: %s", opts.addr); goto out; } @@ -1243,9 +1242,9 @@ int setup_tcp_client(char *hostname, unsigned short port) } inet_ntop(p->ai_family, ip, ipstr, sizeof(ipstr)); - pr_info("Connecting to server %s:%u\n", ipstr, port); + pr_info("Connecting to server %s:%u\n", ipstr, opts.port); - if (get_sockaddr_in(&saddr, ipstr, port)) + if (get_sockaddr_in(&saddr, ipstr)) goto out; sk = socket(saddr.ss_family, SOCK_STREAM, IPPROTO_TCP); @@ -1255,7 +1254,7 @@ int setup_tcp_client(char *hostname, unsigned short port) } if (connect(sk, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { - pr_info("Can't connect to server %s:%u\n", ipstr, port); + pr_info("Can't connect to server %s:%u\n", ipstr, opts.port); close(sk); sk = -1; } else { From a37a02de8d3c67226c5d58ceb6c0ee9beb7b870d Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 24 Feb 2019 22:49:13 +0000 Subject: [PATCH 057/277] remote: Rename 'proxy_to_cache_fd' to 'remote_sk' The variable name 'remote_sk' is shorter than 'proxy_to_cache_fd' and it is more similar to 'page_server_sk' (used in criu/page-xfer.c). Signed-off-by: Radostin Stoyanov --- criu/img-cache.c | 18 +++++++++--------- criu/img-proxy.c | 12 ++++++------ criu/img-remote.c | 22 +++++++++++----------- criu/include/img-remote.h | 2 +- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/criu/img-cache.c b/criu/img-cache.c index 4c1e6ebe7f..5603309b1b 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -18,31 +18,31 @@ int image_cache(bool background, char *local_cache_path) restoring = true; if (opts.ps_socket != -1) { - proxy_to_cache_fd = opts.ps_socket; - pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); + remote_sk = opts.ps_socket; + pr_info("Re-using ps socket %d\n", remote_sk); } else { - proxy_to_cache_fd = setup_tcp_server("image cache"); - if (proxy_to_cache_fd < 0) { + remote_sk = setup_tcp_server("image cache"); + if (remote_sk < 0) { pr_perror("Unable to open proxy to cache TCP socket"); return -1; } // Wait to accept connection from proxy. - tmp = accept(proxy_to_cache_fd, NULL, 0); + tmp = accept(remote_sk, NULL, 0); if (tmp < 0) { pr_perror("Unable to accept remote image connection" " from image proxy"); - close(proxy_to_cache_fd); + close(remote_sk); return -1; } - proxy_to_cache_fd = tmp; + remote_sk = tmp; } - pr_info("Cache is connected to Proxy through fd %d\n", proxy_to_cache_fd); + pr_info("Cache is connected to Proxy through fd %d\n", remote_sk); local_req_fd = setup_UNIX_server_socket(local_cache_path); if (local_req_fd < 0) { pr_perror("Unable to open cache to proxy UNIX socket"); - close(proxy_to_cache_fd); + close(remote_sk); return -1; } diff --git a/criu/img-proxy.c b/criu/img-proxy.c index b11ab83e0c..d6b92148ee 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -21,18 +21,18 @@ int image_proxy(bool background, char *local_proxy_path) } if (opts.ps_socket != -1) { - proxy_to_cache_fd = opts.ps_socket; - pr_info("Re-using ps socket %d\n", proxy_to_cache_fd); + remote_sk = opts.ps_socket; + pr_info("Re-using ps socket %d\n", remote_sk); } else { - proxy_to_cache_fd = setup_tcp_client(); - if (proxy_to_cache_fd < 0) { + remote_sk = setup_tcp_client(); + if (remote_sk < 0) { pr_perror("Unable to open proxy to cache TCP socket"); close(local_req_fd); return -1; } } - pr_info("Proxy is connected to Cache through fd %d\n", proxy_to_cache_fd); + pr_info("Proxy is connected to Cache through fd %d\n", remote_sk); if (background) { if (daemon(1, 0) == -1) { @@ -41,7 +41,7 @@ int image_proxy(bool background, char *local_proxy_path) } } - // TODO - local_req_fd and proxy_to_cache_fd send as args. + // TODO - local_req_fd and remote_sk send as args. accept_image_connections(); pr_info("Finished image proxy."); return 0; diff --git a/criu/img-remote.c b/criu/img-remote.c index 13445b9833..a3813f4a83 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -62,7 +62,7 @@ static bool finished_local = false; static bool finished_remote = false; // Proxy to cache socket fd; Local dump or restore servicing fd. -int proxy_to_cache_fd; +int remote_sk; int local_req_fd; // Epoll fd and event array. @@ -361,7 +361,7 @@ static inline void rop_set_rimg(struct roperation *rop, struct rimage *rimg) rop->size = rimg->size; if (rop->flags == O_APPEND) { // Image forward on append must start where the last fwd finished. - if (rop->fd == proxy_to_cache_fd) { + if (rop->fd == remote_sk) { rop->curr_sent_buf = rimg->curr_fwd_buf; rop->curr_sent_bytes = rimg->curr_fwd_bytes; } else { @@ -653,7 +653,7 @@ static void handle_local_accept(int fd) static inline void finish_proxy_read(struct roperation *rop) { // If finished forwarding image - if (rop->fd == proxy_to_cache_fd) { + if (rop->fd == remote_sk) { // Update fwd buffer and byte count on rimg. rop->rimg->curr_fwd_buf = rop->curr_sent_buf; rop->rimg->curr_fwd_bytes = rop->curr_sent_bytes; @@ -676,7 +676,7 @@ static inline void finish_proxy_write(struct roperation *rop) } else { // Normal image received, forward it. struct roperation *rop_to_forward = new_remote_operation( - rop->path, rop->snapshot_id, proxy_to_cache_fd, rop->flags, false); + rop->path, rop->snapshot_id, remote_sk, rop->flags, false); // Add image to list of images. list_add_tail(&(rop->rimg->l), &rimg_head); @@ -695,7 +695,7 @@ static void finish_cache_write(struct roperation *rop) &rop_pending, rop->snapshot_id, rop->path); forwarding = false; - event_set(epoll_fd, EPOLL_CTL_ADD, proxy_to_cache_fd, EPOLLIN, &proxy_to_cache_fd); + event_set(epoll_fd, EPOLL_CTL_ADD, remote_sk, EPOLLIN, &remote_sk); // Add image to list of images. list_add_tail(&(rop->rimg->l), &rimg_head); @@ -816,8 +816,8 @@ void accept_image_connections() { // Only if we are restoring (cache-side) we need to add the remote sock to // the epoll. if (restoring) { - ret = event_set(epoll_fd, EPOLL_CTL_ADD, proxy_to_cache_fd, - EPOLLIN, &proxy_to_cache_fd); + ret = event_set(epoll_fd, EPOLL_CTL_ADD, remote_sk, + EPOLLIN, &remote_sk); if (ret) { pr_perror("Failed to add proxy to cache fd to epoll"); goto end; @@ -845,9 +845,9 @@ void accept_image_connections() { goto end; } handle_local_accept(local_req_fd); - } else if (restoring && !forwarding && events[i].data.ptr == &proxy_to_cache_fd) { - event_set(epoll_fd, EPOLL_CTL_DEL, proxy_to_cache_fd, 0, 0); - handle_remote_accept(proxy_to_cache_fd); + } else if (restoring && !forwarding && events[i].data.ptr == &remote_sk) { + event_set(epoll_fd, EPOLL_CTL_DEL, remote_sk, 0, 0); + handle_remote_accept(remote_sk); } else { struct roperation *rop = (struct roperation*)events[i].data.ptr; @@ -866,7 +866,7 @@ void accept_image_connections() { finished_local && !finished_remote && list_empty(&rop_forwarding)) { - close(proxy_to_cache_fd); + close(remote_sk); finished_remote = true; } diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index e853d3552b..a0cb22791a 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -67,7 +67,7 @@ struct roperation { }; /* This is the proxy to cache TCP socket FD. */ -extern int proxy_to_cache_fd; +extern int remote_sk; /* This the unix socket used to fulfill local requests. */ extern int local_req_fd; /* True if we are running the cache/restore, false if proxy/dump. */ From c64f65702684ef30d7c1c5e09319b72d00eb8d1a Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 24 Feb 2019 22:51:15 +0000 Subject: [PATCH 058/277] remote: Rename 'local_req_fd' to 'local_sk' The name 'local_sk' is shorter than 'local_req_fd', and it is more similar to the name 'page_server_sk' used in criu/page-xfer.c Signed-off-by: Radostin Stoyanov --- criu/img-cache.c | 4 ++-- criu/img-proxy.c | 8 ++++---- criu/img-remote.c | 12 ++++++------ criu/include/img-remote.h | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/criu/img-cache.c b/criu/img-cache.c index 5603309b1b..f28ba1c3cf 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -39,8 +39,8 @@ int image_cache(bool background, char *local_cache_path) pr_info("Cache is connected to Proxy through fd %d\n", remote_sk); - local_req_fd = setup_UNIX_server_socket(local_cache_path); - if (local_req_fd < 0) { + local_sk = setup_UNIX_server_socket(local_cache_path); + if (local_sk < 0) { pr_perror("Unable to open cache to proxy UNIX socket"); close(remote_sk); return -1; diff --git a/criu/img-proxy.c b/criu/img-proxy.c index d6b92148ee..09fa6dc04f 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -14,8 +14,8 @@ int image_proxy(bool background, char *local_proxy_path) local_proxy_path, opts.addr, opts.port); restoring = false; - local_req_fd = setup_UNIX_server_socket(local_proxy_path); - if (local_req_fd < 0) { + local_sk = setup_UNIX_server_socket(local_proxy_path); + if (local_sk < 0) { pr_perror("Unable to open CRIU to proxy UNIX socket"); return -1; } @@ -27,7 +27,7 @@ int image_proxy(bool background, char *local_proxy_path) remote_sk = setup_tcp_client(); if (remote_sk < 0) { pr_perror("Unable to open proxy to cache TCP socket"); - close(local_req_fd); + close(local_sk); return -1; } } @@ -41,7 +41,7 @@ int image_proxy(bool background, char *local_proxy_path) } } - // TODO - local_req_fd and remote_sk send as args. + // TODO - local_sk and remote_sk send as args. accept_image_connections(); pr_info("Finished image proxy."); return 0; diff --git a/criu/img-remote.c b/criu/img-remote.c index a3813f4a83..38028ca553 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -63,7 +63,7 @@ static bool finished_remote = false; // Proxy to cache socket fd; Local dump or restore servicing fd. int remote_sk; -int local_req_fd; +int local_sk; // Epoll fd and event array. static int epoll_fd; @@ -476,7 +476,7 @@ static inline void finish_local() { int ret; finished_local = true; - ret = event_set(epoll_fd, EPOLL_CTL_DEL, local_req_fd, 0, 0); + ret = event_set(epoll_fd, EPOLL_CTL_DEL, local_sk, 0, 0); if (ret) { pr_perror("Failed to del local fd from epoll"); } @@ -807,7 +807,7 @@ void accept_image_connections() { goto end; } - ret = event_set(epoll_fd, EPOLL_CTL_ADD, local_req_fd, EPOLLIN, &local_req_fd); + ret = event_set(epoll_fd, EPOLL_CTL_ADD, local_sk, EPOLLIN, &local_sk); if (ret) { pr_perror("Failed to add local fd to epoll"); goto end; @@ -837,14 +837,14 @@ void accept_image_connections() { for (i = 0; i < n_events; i++) { // Accept from local dump/restore? - if (events[i].data.ptr == &local_req_fd) { + if (events[i].data.ptr == &local_sk) { if (events[i].events & EPOLLHUP || events[i].events & EPOLLERR) { if (!finished_local) pr_perror("Unable to accept more local image connections"); goto end; } - handle_local_accept(local_req_fd); + handle_local_accept(local_sk); } else if (restoring && !forwarding && events[i].data.ptr == &remote_sk) { event_set(epoll_fd, EPOLL_CTL_DEL, remote_sk, 0, 0); handle_remote_accept(remote_sk); @@ -878,7 +878,7 @@ void accept_image_connections() { } end: close(epoll_fd); - close(local_req_fd); + close(local_sk); free(events); } diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index a0cb22791a..087c395bf7 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -69,7 +69,7 @@ struct roperation { /* This is the proxy to cache TCP socket FD. */ extern int remote_sk; /* This the unix socket used to fulfill local requests. */ -extern int local_req_fd; +extern int local_sk; /* True if we are running the cache/restore, false if proxy/dump. */ extern bool restoring; From 75dead184f610d92a6032258273630c533349f3e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 22 Feb 2019 18:04:32 +0000 Subject: [PATCH 059/277] util: Fix addr casting for IPv4/IPv6 in autobind When saddr.ss_family is AF_INET6 we should cast &saddr to (struct sockaddr_in6 *). Signed-off-by: Radostin Stoyanov --- criu/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/util.c b/criu/util.c index b08d08d1c2..93dfc95ed6 100644 --- a/criu/util.c +++ b/criu/util.c @@ -1143,9 +1143,9 @@ int setup_tcp_server(char *type) } if (saddr.ss_family == AF_INET6) { - opts.port = ntohs(((struct sockaddr_in *)&saddr)->sin_port); - } else if (saddr.ss_family == AF_INET) { opts.port = ntohs(((struct sockaddr_in6 *)&saddr)->sin6_port); + } else if (saddr.ss_family == AF_INET) { + opts.port = ntohs(((struct sockaddr_in *)&saddr)->sin_port); } pr_info("Using %u port\n", opts.port); From 2f665d8cc34a2a398da0c07176fd02975c95ccd8 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 24 Feb 2019 23:00:26 +0000 Subject: [PATCH 060/277] util: Introduce fd_set_nonblocking() Combine the functionality of socket_set_non_blocking() and socket_set_blocking() into a new function, and move it in criu/util.c to enable reusability throughout the code base. Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index 38028ca553..d82863600a 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -137,34 +137,6 @@ static int event_set(int epoll_fd, int op, int fd, uint32_t events, void *data) return ret; } -static inline void socket_set_non_blocking(int fd) -{ - int flags = fcntl(fd, F_GETFL, NULL); - - if (flags < 0) { - pr_perror("Failed to obtain flags from fd %d", fd); - return; - } - flags |= O_NONBLOCK; - - if (fcntl(fd, F_SETFL, flags) < 0) - pr_perror("Failed to set flags for fd %d", fd); -} - -static inline void socket_set_blocking(int fd) -{ - int flags = fcntl(fd, F_GETFL, NULL); - - if (flags < 0) { - pr_perror("Failed to obtain flags from fd %d", fd); - return; - } - flags &= (~O_NONBLOCK); - - if (fcntl(fd, F_SETFL, flags) < 0) - pr_perror("Failed to set flags for fd %d", fd); -} - int setup_UNIX_server_socket(char *path) { struct sockaddr_un addr; @@ -529,7 +501,7 @@ static void forward_remote_image(struct roperation *rop) int64_t ret = 0; // Set blocking during the setup. - socket_set_blocking(rop->fd); + fd_set_nonblocking(rop->fd, false); ret = write_remote_header( rop->fd, rop->snapshot_id, rop->path, rop->flags, rop->size); @@ -545,7 +517,7 @@ static void forward_remote_image(struct roperation *rop) rop->size); // Go back to non-blocking - socket_set_non_blocking(rop->fd); + fd_set_nonblocking(rop->fd, true); forwarding = true; event_set(epoll_fd, EPOLL_CTL_ADD, rop->fd, EPOLLOUT, rop); @@ -561,7 +533,7 @@ static void handle_remote_accept(int fd) struct roperation* rop = NULL; // Set blocking during the setup. - socket_set_blocking(fd); + fd_set_nonblocking(fd, false); ret = read_remote_header(fd, snapshot_id, path, &flags, &size); if (ret < 0) { @@ -576,7 +548,7 @@ static void handle_remote_accept(int fd) } // Go back to non-blocking - socket_set_non_blocking(fd); + fd_set_nonblocking(fd, true); pr_info("[fd=%d] Received %s request for %s:%s with %" PRIu64 " bytes\n", fd, strflags(flags), path, snapshot_id, size); @@ -642,7 +614,7 @@ static void handle_local_accept(int fd) } else { list_add_tail(&(rop->l), &rop_pending); } - socket_set_non_blocking(rop->fd); + fd_set_nonblocking(rop->fd, false); } return; From 047b127a1dd4e6d3bd346c31bab019c811dd401d Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 27 Feb 2019 21:32:50 +0000 Subject: [PATCH 061/277] remote: Use \0 as indicator for a "finish" msg Combine the macro constants DUMP_FINISH and RESTORE_FINISH, into a single one, called FINISH. In addition, replace the key-word strings used by the above-mentioned constants, and NULL_SNAPSHOT_ID, with a \0 character that will be used to indicate a "finish" message. Signed-off-by: Radostin Stoyanov --- criu/img-remote.c | 47 +++++++++++++++++---------------------- criu/include/img-remote.h | 5 ++--- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/criu/img-remote.c b/criu/img-remote.c index d82863600a..e2e3ed0193 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -460,13 +460,6 @@ static struct roperation *handle_accept_cache_read(int cli_fd, struct rimage *rimg = NULL; struct roperation *rop = NULL; - // Check if this is the restore finish message. - if (!strncmp(path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) { - close(cli_fd); - finish_local(); - return NULL; - } - rop = new_remote_operation(path, snapshot_id, cli_fd, flags, true); if (rop == NULL) { pr_perror("Error preparing remote operation"); @@ -587,6 +580,12 @@ static void handle_local_accept(int fd) goto err; } + if (snapshot_id[0] == NULL_SNAPSHOT_ID && path[0] == FINISH) { + close(cli_fd); + finish_local(); + return; + } + pr_info("[fd=%d] Received %s request for %s:%s\n", cli_fd, strflags(flags), path, snapshot_id); @@ -641,24 +640,18 @@ static inline void finish_proxy_read(struct roperation *rop) static inline void finish_proxy_write(struct roperation *rop) { - // No more local images are comming. Close local socket. - if (!strncmp(rop->path, DUMP_FINISH, sizeof(DUMP_FINISH))) { - // TODO - couldn't we handle the DUMP_FINISH in inside handle_accept_proxy_write? - finish_local(); - } else { - // Normal image received, forward it. - struct roperation *rop_to_forward = new_remote_operation( - rop->path, rop->snapshot_id, remote_sk, rop->flags, false); + // Normal image received, forward it. + struct roperation *rop_to_forward = new_remote_operation( + rop->path, rop->snapshot_id, remote_sk, rop->flags, false); - // Add image to list of images. - list_add_tail(&(rop->rimg->l), &rimg_head); + // Add image to list of images. + list_add_tail(&(rop->rimg->l), &rimg_head); - rop_set_rimg(rop_to_forward, rop->rimg); - if (list_empty(&rop_forwarding)) { - forward_remote_image(rop_to_forward); - } - list_add_tail(&(rop_to_forward->l), &rop_forwarding); + rop_set_rimg(rop_to_forward, rop->rimg); + if (list_empty(&rop_forwarding)) { + forward_remote_image(rop_to_forward); } + list_add_tail(&(rop_to_forward->l), &rop_forwarding); } static void finish_cache_write(struct roperation *rop) @@ -967,9 +960,11 @@ int read_remote_image_connection(char *snapshot_id, char *path) path, snapshot_id); return -1; } - if (!error || !strncmp(path, RESTORE_FINISH, sizeof(RESTORE_FINISH))) { + + if (!error || (snapshot_id[0] == NULL_SNAPSHOT_ID && path[0] != FINISH)) return sockfd; - } else if (error == ENOENT) { + + if (error == ENOENT) { pr_info("Image does not exist (%s:%s)\n", path, snapshot_id); close(sockfd); return -ENOENT; @@ -997,7 +992,7 @@ int write_remote_image_connection(char *snapshot_id, char *path, int flags) int finish_remote_dump(void) { pr_info("Dump side is calling finish\n"); - int fd = write_remote_image_connection(NULL_SNAPSHOT_ID, DUMP_FINISH, O_WRONLY); + int fd = write_remote_image_connection(NULL_SNAPSHOT_ID, FINISH, O_WRONLY); if (fd == -1) { pr_err("Unable to open finish dump connection"); @@ -1011,7 +1006,7 @@ int finish_remote_dump(void) int finish_remote_restore(void) { pr_info("Restore side is calling finish\n"); - int fd = read_remote_image_connection(NULL_SNAPSHOT_ID, RESTORE_FINISH); + int fd = read_remote_image_connection(NULL_SNAPSHOT_ID, FINISH); if (fd == -1) { pr_err("Unable to open finish restore connection\n"); diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index 087c395bf7..66d75b90ff 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -9,10 +9,9 @@ #ifndef IMAGE_REMOTE_H #define IMAGE_REMOTE_H -#define DUMP_FINISH "DUMP_FINISH" -#define RESTORE_FINISH "RESTORE_FINISH" +#define FINISH 0 #define PARENT_IMG "parent" -#define NULL_SNAPSHOT_ID "null" +#define NULL_SNAPSHOT_ID 0 #define DEFAULT_CACHE_SOCKET "img-cache.sock" #define DEFAULT_PROXY_SOCKET "img-proxy.sock" From 861510f7b6f9517a1fa4f299a0709a19d98a9069 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 13 Feb 2019 21:11:43 +0000 Subject: [PATCH 062/277] Sort includes in criu/img-*.c Sort and remove unused/unnecessary include statements in criu/img-*.c Signed-off-by: Radostin Stoyanov --- criu/img-cache.c | 8 ++------ criu/img-proxy.c | 7 ++----- criu/img-remote.c | 28 ++++++++-------------------- 3 files changed, 12 insertions(+), 31 deletions(-) diff --git a/criu/img-cache.c b/criu/img-cache.c index f28ba1c3cf..3887b500d2 100644 --- a/criu/img-cache.c +++ b/criu/img-cache.c @@ -1,12 +1,8 @@ +#include #include -#include "img-remote.h" -#include "criu-log.h" -#include -#include -#include -#include #include "cr_options.h" +#include "img-remote.h" #include "util.h" int image_cache(bool background, char *local_cache_path) diff --git a/criu/img-proxy.c b/criu/img-proxy.c index 09fa6dc04f..f15bd7c9a0 100644 --- a/criu/img-proxy.c +++ b/criu/img-proxy.c @@ -1,11 +1,8 @@ #include -#include "img-remote.h" -#include "criu-log.h" -#include -#include -#include #include "cr_options.h" +#include "criu-log.h" +#include "img-remote.h" #include "util.h" int image_proxy(bool background, char *local_proxy_path) diff --git a/criu/img-remote.c b/criu/img-remote.c index e2e3ed0193..433c012ab7 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -1,28 +1,16 @@ -#include -#include -#include -#include +#include #include #include -#include -#include -#include "xmalloc.h" -#include "criu-log.h" +#include +#include + +#include "cr_options.h" #include "img-remote.h" +#include "image.h" #include "images/remote-image.pb-c.h" -#include "protobuf-desc.h" -#include -#include "servicefd.h" -#include "common/compiler.h" -#include "cr_options.h" - -#include -#include "sys/un.h" -#include -#include - #include "protobuf.h" -#include "image.h" +#include "servicefd.h" +#include "xmalloc.h" #define EPOLL_MAX_EVENTS 50 From 48ff3f1eb0288e5715862277ee442e15c2cf4aee Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 24 Mar 2019 14:26:10 +0000 Subject: [PATCH 063/277] util: Remove deprecated print_data() routine The print_data() function was part of the deprecated (and removed) 'show' action, and it was moved in util.c with the following commit: a501b4804b3c95e1d83d64dd10ed95c37f0378bb The 'show' action has been deprecated since 1.6, let's finally drop it. The print_data() routine is kept for yet another (to be deprecated too) feature called 'criu exec'. The criu exec feature was removed with: 909590a3558560655c1ce5b72215efbb325999ca Remove criu exec code It's now obsoleted by compel library. Maybe-TODO: Add compel tool exec action? Therefore, now we can drop print_data() as well. Signed-off-by: Radostin Stoyanov --- criu/include/util.h | 2 -- criu/util.c | 73 --------------------------------------------- 2 files changed, 75 deletions(-) diff --git a/criu/include/util.h b/criu/include/util.h index fe6ee4427a..a14be72293 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -290,8 +290,6 @@ char *xstrcat(char *str, const char *fmt, ...) char *xsprintf(const char *fmt, ...) __attribute__ ((__format__ (__printf__, 1, 2))); -void print_data(unsigned long addr, unsigned char *data, size_t size); - int setup_tcp_server(char *type); int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk); int setup_tcp_client(void); diff --git a/criu/util.c b/criu/util.c index 93dfc95ed6..f02486a15f 100644 --- a/criu/util.c +++ b/criu/util.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "kerndat.h" #include "page.h" @@ -1001,78 +1000,6 @@ void tcp_nodelay(int sk, bool on) pr_perror("Unable to restore TCP_NODELAY (%d)", val); } -static inline void pr_xsym(unsigned char *data, size_t len, int pos) -{ - char sym; - - if (pos < len) - sym = data[pos]; - else - sym = ' '; - - pr_msg("%c", isprint(sym) ? sym : '.'); -} - -static inline void pr_xdigi(unsigned char *data, size_t len, int pos) -{ - if (pos < len) - pr_msg("%02x ", data[pos]); - else - pr_msg(" "); -} - -static int nice_width_for(unsigned long addr) -{ - int ret = 3; - - while (addr) { - addr >>= 4; - ret++; - } - - return ret; -} - -void print_data(unsigned long addr, unsigned char *data, size_t size) -{ - int i, j, addr_len; - unsigned zero_line = 0; - - addr_len = nice_width_for(addr + size); - - for (i = 0; i < size; i += 16) { - if (*(u64 *)(data + i) == 0 && *(u64 *)(data + i + 8) == 0) { - if (zero_line == 0) - zero_line = 1; - else { - if (zero_line == 1) { - pr_msg("*\n"); - zero_line = 2; - } - - continue; - } - } else - zero_line = 0; - - pr_msg("%#0*lx: ", addr_len, addr + i); - for (j = 0; j < 8; j++) - pr_xdigi(data, size, i + j); - pr_msg(" "); - for (j = 8; j < 16; j++) - pr_xdigi(data, size, i + j); - - pr_msg(" |"); - for (j = 0; j < 8; j++) - pr_xsym(data, size, i + j); - pr_msg(" "); - for (j = 8; j < 16; j++) - pr_xsym(data, size, i + j); - - pr_msg("|\n"); - } -} - static int get_sockaddr_in(struct sockaddr_storage *addr, char *host) { memset(addr, 0, sizeof(*addr)); From 83cb0c01beec52b5dd99f8332920f9e9a0882527 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 7 Apr 2019 20:41:37 +0100 Subject: [PATCH 064/277] criu-ns: Convert to python3 style print() syntax Signed-off-by: Radostin Stoyanov --- scripts/criu-ns | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/criu-ns b/scripts/criu-ns index 0910f2a336..b6b6a1111b 100755 --- a/scripts/criu-ns +++ b/scripts/criu-ns @@ -57,7 +57,7 @@ else: def run_criu(): - print sys.argv + print(sys.argv) os.execlp('criu', *['criu'] + sys.argv[1:]) @@ -155,7 +155,7 @@ def set_pidns(tpid, pid_idx): if ls[1] != tpid: raise OSError(errno.ESRCH, 'No such pid') - print 'Replace pid %s with %s' % (tpid, ls[2]) + print('Replace pid {} with {}'.format(tpid, ls[2])) sys.argv[pid_idx] = ls[2] break else: @@ -234,7 +234,7 @@ if action == 'restore': elif action == 'dump' or action == 'pre-dump': res = wrap_dump() else: - print 'Unsupported action %s for nswrap' % action + print('Unsupported action {} for nswrap'.format(action)) res = -1 sys.exit(res) From 3846d16d69b73e9f56f4a7cb1675880914e9f665 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 7 Apr 2019 20:43:12 +0100 Subject: [PATCH 065/277] criu-ns: Print usage info when no args provided Signed-off-by: Radostin Stoyanov --- scripts/criu-ns | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/criu-ns b/scripts/criu-ns index b6b6a1111b..b582f75800 100755 --- a/scripts/criu-ns +++ b/scripts/criu-ns @@ -227,6 +227,18 @@ def wrap_dump(): return status +if len(sys.argv) == 1: + print(""" +Usage: + {0} dump|pre-dump -t PID [] + {0} restore [] +\nCommands: + dump checkpoint a process/tree identified by pid + pre-dump pre-dump task(s) minimizing their frozen time + restore restore a process/tree +""".format(sys.argv[0])) + exit(1) + action = sys.argv[1] if action == 'restore': From 50b1a4958c00e6ea3c1bb3c12f0da9e582bab345 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 7 Apr 2019 20:55:32 +0100 Subject: [PATCH 066/277] criu-ns: Convert c_char_p strings to bytes object class ctypes.c_char_p Represents the C char * datatype when it points to a zero- terminated string. For a general character pointer that may also point to binary data, POINTER(c_char) must be used. The constructor accepts an integer address, or a bytes object. https://docs.python.org/3/library/ctypes.html#ctypes.c_char_p Signed-off-by: Radostin Stoyanov --- scripts/criu-ns | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/criu-ns b/scripts/criu-ns index b582f75800..e065c59716 100755 --- a/scripts/criu-ns +++ b/scripts/criu-ns @@ -74,11 +74,11 @@ def wrap_restore(): os.close(r_pipe) # Mount new /proc - if _mount(None, "/", None, MS_SLAVE|MS_REC, None) != 0: + if _mount(None, b"/", None, MS_SLAVE|MS_REC, None) != 0: _errno = ctypes.get_errno() raise OSError(_errno, errno.errorcode[_errno]) - if _mount('proc', '/proc', 'proc', 0, None) != 0: + if _mount(b'proc', b'/proc', b'proc', 0, None) != 0: _errno = ctypes.get_errno() raise OSError(_errno, errno.errorcode[_errno]) @@ -98,7 +98,7 @@ def wrap_restore(): status = -251 break - os.write(w_pipe, "%d" % status) + os.write(w_pipe, b"%d" % status) os.close(w_pipe) if status != 0: From 38a9b23f068e2c06f9d88995e5f8db654f418ce0 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 12 Apr 2019 21:01:36 +0100 Subject: [PATCH 067/277] Convert spaces to tabs There are a few places where spaces have been used instead of tabs for indentation. This patch converts the spaces to tabs for consistency with the rest of the code base. Signed-off-by: Radostin Stoyanov --- criu/include/protobuf-desc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 696a5800b2..21ba27193f 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -61,10 +61,10 @@ enum { PB_AUTOFS, PB_GHOST_CHUNK, PB_FILE, - PB_REMOTE_IMAGE, /* Header for images sent from proxy to cache.*/ - PB_LOCAL_IMAGE, /* Header for reading/writing images from/to proxy or cache. */ + PB_REMOTE_IMAGE, /* Header for images sent from proxy to cache.*/ + PB_LOCAL_IMAGE, /* Header for reading/writing images from/to proxy or cache. */ PB_LOCAL_IMAGE_REPLY, /* Header for reading/writing images reply. */ - PB_SNAPSHOT_ID, /* Contains a single id. Used for reading/writing ids from proxy or cache. */ + PB_SNAPSHOT_ID, /* Contains a single id. Used for reading/writing ids from proxy or cache. */ /* PB_AUTOGEN_STOP */ From 13d2868bb37700c110ddf8b6d550a996c36fee8c Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 09:47:17 +0000 Subject: [PATCH 068/277] net: fix coverity RESOURCE_LEAK criu-3.12/criu/net.c:2043: overwrite_var: Overwriting "img" in "img = open_image_at(-1, CR_FD_IP6TABLES, 0UL, pid)" leaks the storage that "img" points to. Signed-off-by: Adrian Reber --- criu/net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/criu/net.c b/criu/net.c index b9f6669c3c..fe9b51addc 100644 --- a/criu/net.c +++ b/criu/net.c @@ -1941,13 +1941,13 @@ static int restore_ip_dump(int type, int pid, char *cmd) if (written < n) { pr_perror("Failed to write to tmpfile " "[written: %d; total: %d]", written, n); - return -1; + goto close; } } if (fseek(tmp_file, 0, SEEK_SET)) { pr_perror("Failed to set file position to beginning of tmpfile"); - return -1; + goto close; } if (img) { @@ -1955,6 +1955,7 @@ static int restore_ip_dump(int type, int pid, char *cmd) close_image(img); } +close: if(fclose(tmp_file)) { pr_perror("Failed to close tmpfile"); } @@ -2059,6 +2060,7 @@ static inline int restore_iptables(int pid) return -1; if (empty_image(img)) { ret = 0; + close_image(img); goto ipt6; } From fe9ad6288e0c95976206372769b1c1539cbaed35 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 1 Jul 2019 17:40:44 +0300 Subject: [PATCH 069/277] py: Manual fixlets of code formatting Signed-off-by: Pavel Emelyanov --- coredump/criu_coredump/coredump.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index bc53a77051..68dc16bf2f 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -645,8 +645,7 @@ def _get_page(self, pid, page_no): ppid = self.pstree[pid]["ppid"] return self._get_page(ppid, page_no) else: - with open(self._imgs_dir + "/" + "pages-" + str(pages_id) + - ".img") as f: + with open(self._imgs_dir + "/pages-%s.img" % pages_id) as f: f.seek(off * PAGESIZE) return f.read(PAGESIZE) From a981ddfd1e5d63f5d3c71e9e86147dbf980f97c8 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 1 Sep 2019 12:23:39 +0100 Subject: [PATCH 070/277] files-reg: Drop clear_ghost_files() prototype The function clear_ghost_files() has been removed in commit b11eeea "restore: auto-unlink for ghost files (v2)". Signed-off-by: Radostin Stoyanov --- criu/include/files-reg.h | 1 - 1 file changed, 1 deletion(-) diff --git a/criu/include/files-reg.h b/criu/include/files-reg.h index 7a22d4d829..016d76a9fc 100644 --- a/criu/include/files-reg.h +++ b/criu/include/files-reg.h @@ -30,7 +30,6 @@ extern int open_reg_by_id(u32 id); extern int open_reg_fd(struct file_desc *); extern int open_path(struct file_desc *, int (*open_cb)(int ns_root_fd, struct reg_file_info *, void *), void *arg); -extern void clear_ghost_files(void); extern const struct fdtype_ops regfile_dump_ops; extern int do_open_reg_noseek_flags(int ns_root_fd, struct reg_file_info *rfi, void *arg); From cf7e7fc07298b33192003e347363bd85a91f4601 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 13 Aug 2019 22:11:04 +0100 Subject: [PATCH 071/277] cr-dump: Remove redundant if-statement Signed-off-by: Radostin Stoyanov --- criu/cr-dump.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index e070b8b254..119c82d39f 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -783,8 +783,6 @@ static int dump_task_core_all(struct parasite_ctl *ctl, img = img_from_set(cr_imgset, CR_FD_CORE); ret = pb_write_one(img, core, PB_CORE); - if (ret < 0) - goto err; err: pr_info("----------------------------------------\n"); From 024bb21a5d197b1392714756988e38782b8b798e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 10 Sep 2019 06:50:58 -0700 Subject: [PATCH 072/277] arch/x86: push correct eip on the stack before lretq Right now we use pushq, but it pushes sign-extended value, so if the parasite code is placed higher that 2Gb, we will see something like this: 0xf7efd5b0: pushq $0x23 0xf7efd5b2: pushq $0xfffffffff7efd5b9 => 0xf7efd5b7: lretq Actually we want to push 0xf7efd5b9 instead of 0xfffffffff7efd5b9. Fixes: #398 Cc: Dmitry Safonov Cc: Cyrill Gorcunov Signed-off-by: Andrei Vagin Acked-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- compel/arch/x86/src/lib/include/uapi/asm/sigframe.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h index 51ca023f77..486c0c8e03 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h @@ -194,7 +194,9 @@ void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe) #define ARCH_RT_SIGRETURN_COMPAT(new_sp) \ asm volatile( \ "pushq $"__stringify(USER32_CS)" \n" \ - "pushq $1f \n" \ + "xor %%rax, %%rax \n" \ + "movl $1f, %%eax \n" \ + "pushq %%rax \n" \ "lretq \n" \ "1: \n" \ ".code32 \n" \ From 266db50cf700492976b79abfd6741ec555b8cca0 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 11 Sep 2019 11:13:51 +0100 Subject: [PATCH 073/277] compel/x86: Don't use pushq for a label `pushq` sign-extends the value. Which is a bummer as the label's address may be higher that 2Gb, which means that the sign-bit will be set. As it long-jumps with ia32 selector, %r11 can be scratched. Use %r11 register as a temporary to push the 32-bit address. Complements: a9a760278c1a ("arch/x86: push correct eip on the stack before lretq") Cc: Cyrill Gorcunov Reported-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/x86/plugins/std/parasite-head.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compel/arch/x86/plugins/std/parasite-head.S b/compel/arch/x86/plugins/std/parasite-head.S index a988de9d42..465cd887b1 100644 --- a/compel/arch/x86/plugins/std/parasite-head.S +++ b/compel/arch/x86/plugins/std/parasite-head.S @@ -25,7 +25,9 @@ ENTRY(__export_parasite_head_start_compat) .code64 PARASITE_ENTRY 0 pushq $__USER32_CS - pushq $2f + xor %r11, %r11 + movl $2f, %r11d + pushq %r11 lretq 2: .code32 From bdee0a76a74f632ae5a6fcb88bcb95a8b49cabc1 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 9 Sep 2019 21:57:33 +0100 Subject: [PATCH 074/277] scripts: Drop Fedora 28/rawhide fix This change was introduced with c75cb2b and it is no longer necessary. Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 280ce1cdd8..b1127c9b23 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -30,12 +30,6 @@ RUN dnf install -y \ rubygem-asciidoctor \ kmod -# Replace coreutils-single with "traditional" coreutils -# to fix the following error on Fedora 28/rawhide while -# running under QEMU: -# > sh: /usr/bin/sort: /usr/bin/coreutils: bad interpreter: No such file or directory -RUN dnf install -y --allowerasing coreutils - RUN ln -sf python3 /usr/bin/python ENV PYTHON=python3 From f3caa7f16b270869be545fc6bf6732beb7a2c4fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Thu, 8 Aug 2019 18:49:13 +0200 Subject: [PATCH 075/277] Add new command line option: --cgroup-yard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of creating cgroup yard in CRIU, now we can create it externally and pass it to CRIU. Useful if somebody doesn't want to grant CAP_SYS_ADMIN to CRIU. Signed-off-by: MichaÅ‚ CÅ‚apiÅ„ski --- Documentation/criu.txt | 36 ++++++++++++-- criu/cgroup.c | 99 +++++++++++++++++++++++++-------------- criu/config.c | 4 ++ criu/cr-service.c | 3 ++ criu/crtools.c | 4 ++ criu/image.c | 2 +- criu/include/cr_options.h | 1 + images/rpc.proto | 1 + lib/c/criu.c | 13 +++++ lib/c/criu.h | 1 + 10 files changed, 125 insertions(+), 39 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 94fc5428a3..28913a7fb8 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -266,10 +266,33 @@ For example, the command line for the above example should look like this: discovered automatically (usually via */proc*). This option is useful when one needs *criu* to skip some controllers. -*--cgroup-props-ignore-default*:: - When combined with *--cgroup-props*, makes *criu* substitute - a predefined controller property with the new one shipped. If the option - is not used, the predefined properties are merged with the provided ones. +*--cgroup-yard* 'path':: + Instead of trying to mount cgroups in CRIU, provide a path to a directory + with already created cgroup yard. Useful if you don't want to grant + CAP_SYS_ADMIN to CRIU. For every cgroup mount there should be exactly one + directory. If there is only one controller in this mount, the dir's name + should be just the name of the controller. If there are multiple controllers + comounted, the directory name should have them be separated by a comma. ++ +For example, if */proc/cgroups* looks like this: ++ +---------- +#subsys_name hierarchy num_cgroups enabled +cpu 1 1 1 +devices 2 2 1 +freezer 2 2 1 +---------- ++ +then you can create the cgroup yard by the following commands: ++ +---------- +mkdir private_yard +cd private_yard +mkdir cpu +mount -t cgroup -o cpu none cpu +mkdir devices,freezer +mount -t cgroup -o devices,freezer none devices,freezer +---------- *--tcp-established*:: Checkpoint established TCP connections. @@ -442,6 +465,11 @@ The 'mode' may be one of the following: *ignore*::: Don't deal with cgroups and pretend that they don't exist. +*--cgroup-yard* 'path':: + Instead of trying to mount cgroups in CRIU, provide a path to a directory + with already created cgroup yard. For more information look in the *dump* + section. + *--cgroup-root* ['controller'*:*]/'newroot':: Change the root cgroup the controller will be installed into. No controller means that root is the default for all controllers not specified. diff --git a/criu/cgroup.c b/criu/cgroup.c index 332c79fb9a..9f3aef10d1 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -549,8 +549,9 @@ static int collect_cgroups(struct list_head *ctls) int fd = -1; list_for_each_entry(cc, ctls, l) { - char path[PATH_MAX], mopts[1024], *root; + char path[PATH_MAX], *root; char prefix[] = ".criu.cgmounts.XXXXXX"; + const char namestr[] = "name="; struct cg_controller *cg; struct cg_root_opt *o; @@ -568,7 +569,7 @@ static int collect_cgroups(struct list_head *ctls) if (!current_controller) { /* only allow "fake" controllers to be created this way */ - if (!strstartswith(cc->name, "name=")) { + if (!strstartswith(cc->name, namestr)) { pr_err("controller %s not found\n", cc->name); return -1; } else { @@ -586,25 +587,44 @@ static int collect_cgroups(struct list_head *ctls) if (!opts.manage_cgroups) continue; - if (strstartswith(cc->name, "name=")) - snprintf(mopts, sizeof(mopts), "none,%s", cc->name); - else - snprintf(mopts, sizeof(mopts), "%s", cc->name); + if (opts.cgroup_yard) { + char dir_path[PATH_MAX]; + int off; + + off = snprintf(dir_path, PATH_MAX, "%s/", opts.cgroup_yard); + if (strstartswith(cc->name, namestr)) + snprintf(dir_path + off, PATH_MAX, "%s", cc->name + strlen(namestr)); + else + snprintf(dir_path + off, PATH_MAX, "%s", cc->name); + + fd = open(dir_path, O_RDONLY | O_DIRECTORY, 0); + if (fd < 0) { + pr_perror("couldn't open %s", dir_path); + return -1; + } + } else { + char mopts[1024]; - if (mkdtemp(prefix) == NULL) { - pr_perror("can't make dir for cg mounts"); - return -1; - } + if (strstartswith(cc->name, namestr)) + snprintf(mopts, sizeof(mopts), "none,%s", cc->name); + else + snprintf(mopts, sizeof(mopts), "%s", cc->name); - if (mount("none", prefix, "cgroup", 0, mopts) < 0) { - pr_perror("couldn't mount %s", mopts); - rmdir(prefix); - return -1; - } + if (mkdtemp(prefix) == NULL) { + pr_perror("can't make dir for cg mounts"); + return -1; + } - fd = open_detach_mount(prefix); - if (fd < 0) - return -1; + if (mount("none", prefix, "cgroup", 0, mopts) < 0) { + pr_perror("couldn't mount %s", mopts); + rmdir(prefix); + return -1; + } + + fd = open_detach_mount(prefix); + if (fd < 0) + return -1; + } path_pref_len = snprintf(path, PATH_MAX, "/proc/self/fd/%d", fd); @@ -620,6 +640,7 @@ static int collect_cgroups(struct list_head *ctls) snprintf(path + path_pref_len, PATH_MAX - path_pref_len, "%s", root); ret = ftw(path, add_cgroup, 4); + if (ret < 0) pr_perror("failed walking %s for empty cgroups", path); @@ -1167,10 +1188,12 @@ void fini_cgroup(void) return; close_service_fd(CGROUP_YARD); - if (umount2(cg_yard, MNT_DETACH)) - pr_perror("Unable to umount %s", cg_yard); - if (rmdir(cg_yard)) - pr_perror("Unable to remove %s", cg_yard); + if (!opts.cgroup_yard) { + if (umount2(cg_yard, MNT_DETACH)) + pr_perror("Unable to umount %s", cg_yard); + if (rmdir(cg_yard)) + pr_perror("Unable to remove %s", cg_yard); + } xfree(cg_yard); cg_yard = NULL; } @@ -1652,20 +1675,28 @@ static int prepare_cgroup_sfd(CgroupEntry *ce) pr_info("Preparing cgroups yard (cgroups restore mode %#x)\n", opts.manage_cgroups); - off = sprintf(paux, ".criu.cgyard.XXXXXX"); - if (mkdtemp(paux) == NULL) { - pr_perror("Can't make temp cgyard dir"); - return -1; - } + if (opts.cgroup_yard) { + off = sprintf(paux, "%s", opts.cgroup_yard); - cg_yard = xstrdup(paux); - if (!cg_yard) { - rmdir(paux); - return -1; - } + cg_yard = xstrdup(paux); + if (!cg_yard) + return -1; + } else { + off = sprintf(paux, ".criu.cgyard.XXXXXX"); + if (mkdtemp(paux) == NULL) { + pr_perror("Can't make temp cgyard dir"); + return -1; + } - if (make_yard(cg_yard)) - goto err; + cg_yard = xstrdup(paux); + if (!cg_yard) { + rmdir(paux); + return -1; + } + + if (make_yard(cg_yard)) + goto err; + } pr_debug("Opening %s as cg yard\n", cg_yard); i = open(cg_yard, O_DIRECTORY); diff --git a/criu/config.c b/criu/config.c index 3a54afd4b3..1a6d0141d3 100644 --- a/criu/config.c +++ b/criu/config.c @@ -517,6 +517,7 @@ int parse_options(int argc, char **argv, bool *usage_error, { "tls-key", required_argument, 0, 1095}, BOOL_OPT("tls", &opts.tls), {"tls-no-cn-verify", no_argument, &opts.tls_no_cn_verify, true}, + { "cgroup-yard", required_argument, 0, 1096 }, { }, }; @@ -815,6 +816,9 @@ int parse_options(int argc, char **argv, bool *usage_error, case 1095: SET_CHAR_OPTS(tls_key, optarg); break; + case 1096: + SET_CHAR_OPTS(cgroup_yard, optarg); + break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) diff --git a/criu/cr-service.c b/criu/cr-service.c index 0938db02b0..95ba2e5cea 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -608,6 +608,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req) goto err; } + if (req->cgroup_yard) + SET_CHAR_OPTS(cgroup_yard, req->cgroup_yard); + if (req->tls_cacert) SET_CHAR_OPTS(tls_cacert, req->tls_cacert); if (req->tls_cacrl) diff --git a/criu/crtools.c b/criu/crtools.c index 97a6d6d6c3..c0de1c03d0 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -389,6 +389,10 @@ int main(int argc, char *argv[], char *envp[]) " --cgroup-dump-controller NAME\n" " define cgroup controller to be dumped\n" " and skip anything else present in system\n" +" --cgroup-yard PATH\n" +" instead of trying to mount cgroups in CRIU, provide\n" +" a path to a directory with already created cgroup yard.\n" +" Useful if you don't want to grant CAP_SYS_ADMIN to CRIU\n" " --lsm-profile TYPE:NAME\n" " Specify an LSM profile to be used during restore.\n" " The type can be either 'apparmor' or 'selinux'.\n" diff --git a/criu/image.c b/criu/image.c index c21ac17741..1a484f192a 100644 --- a/criu/image.c +++ b/criu/image.c @@ -191,7 +191,7 @@ int prepare_inventory(InventoryEntry *he) struct dmp_info d; } crt = { .i.pid = &pid }; - pr_info("Perparing image inventory (version %u)\n", CRTOOLS_IMAGES_V1); + pr_info("Preparing image inventory (version %u)\n", CRTOOLS_IMAGES_V1); he->img_version = CRTOOLS_IMAGES_V1_1; he->fdinfo_per_id = true; diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index c519c740df..5cbc56f95c 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -106,6 +106,7 @@ struct cr_options { char *cgroup_props; char *cgroup_props_file; struct list_head new_cgroup_roots; + char *cgroup_yard; bool autodetect_ext_mounts; int enable_external_sharing; int enable_external_masters; diff --git a/images/rpc.proto b/images/rpc.proto index 15e677a775..c402259acc 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -120,6 +120,7 @@ message criu_opts { optional string tls_key = 57; optional bool tls = 58; optional bool tls_no_cn_verify = 59; + optional string cgroup_yard = 60; /* optional bool check_mounts = 128; */ } diff --git a/lib/c/criu.c b/lib/c/criu.c index 17d5c3983d..14ddff26db 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -987,6 +987,19 @@ int criu_local_add_cg_dump_controller(criu_opts *opts, const char *name) return 0; } +int criu_local_add_cg_yard(criu_opts *opts, const char *path) +{ + char *new; + + new = strdup(path); + if (!new) + return -ENOMEM; + + free(opts->rpc->cgroup_yard); + opts->rpc->cgroup_yard = new; + return 0; +} + int criu_add_skip_mnt(const char *mnt) { return criu_local_add_skip_mnt(global_opts, mnt); diff --git a/lib/c/criu.h b/lib/c/criu.h index 76f3547fcd..cb37c5291d 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -207,6 +207,7 @@ int criu_local_add_irmap_path(criu_opts *opts, const char *path); int criu_local_add_cg_props(criu_opts *opts, const char *stream); int criu_local_add_cg_props_file(criu_opts *opts, const char *path); int criu_local_add_cg_dump_controller(criu_opts *opts, const char *name); +int criu_local_add_cg_yard(criu_opts *opts, const char *path); int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key); int criu_local_add_external(criu_opts *opts, const char *key); int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port); From 10fb1cf0ccb3fa904c57d512fb37f6ab8b232139 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Wed, 14 Aug 2019 21:13:34 +0200 Subject: [PATCH 076/277] test: implement test for new --cgroup-yard option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: MichaÅ‚ CÅ‚apiÅ„ski --- test/zdtm.py | 4 +- test/zdtm/static/Makefile | 3 +- test/zdtm/static/cgroup_yard.c | 1 + test/zdtm/static/cgroup_yard.desc | 7 ++++ test/zdtm/static/cgroup_yard.hook | 70 +++++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 3 deletions(-) create mode 120000 test/zdtm/static/cgroup_yard.c create mode 100644 test/zdtm/static/cgroup_yard.desc create mode 100755 test/zdtm/static/cgroup_yard.hook diff --git a/test/zdtm.py b/test/zdtm.py index 9b93a51e73..98d113f5b0 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -2048,7 +2048,7 @@ def print_sep(title, sep="=", width=80): def print_error(line): line = line.rstrip() - print(line) + print(line.encode('utf-8')) if line.endswith('>'): # combine pie output return True return False @@ -2058,7 +2058,7 @@ def grep_errors(fname): first = True print_next = False before = [] - with open(fname) as fd: + with open(fname, errors='replace') as fd: for l in fd: before.append(l) if len(before) > 5: diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index d8279d6f84..a38482f44e 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -319,7 +319,8 @@ TST_DIR = \ cgroup03 \ cgroup04 \ cgroup_ifpriomap \ - cgroup_stray \ + cgroup_stray \ + cgroup_yard \ unlink_fstat04 \ unlink_fstat041 \ mntns_remap \ diff --git a/test/zdtm/static/cgroup_yard.c b/test/zdtm/static/cgroup_yard.c new file mode 120000 index 0000000000..f3683c2b43 --- /dev/null +++ b/test/zdtm/static/cgroup_yard.c @@ -0,0 +1 @@ +cgroup00.c \ No newline at end of file diff --git a/test/zdtm/static/cgroup_yard.desc b/test/zdtm/static/cgroup_yard.desc new file mode 100644 index 0000000000..8736d6780d --- /dev/null +++ b/test/zdtm/static/cgroup_yard.desc @@ -0,0 +1,7 @@ +{ +'flavor': 'h', +'flags': 'suid', +# We create the external cgroup yard in working directory during --pre-dump +# hook. We have to go up a few directories to find the yard. +'opts': '--manage-cgroups --cgroup-yard ../../../../../../external_yard' +} diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook new file mode 100755 index 0000000000..7ae53342c9 --- /dev/null +++ b/test/zdtm/static/cgroup_yard.hook @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +import sys +import os +import subprocess +import tempfile + +yard = "external_yard" + +if sys.argv[1] == "--pre-dump": + ''' + Create external cgroup yard to be passed to CRIU via --cgroup-yard + ''' + os.mkdir(yard) + with open("/proc/self/cgroup") as f: + for line in f: + cgr = line.split(":")[1] + + if cgr == "": + continue + + if cgr.startswith("name="): + ctrl = cgr[len("name="):] + opts = "none," + cgr + else: + ctrl = cgr + opts = cgr + + os.mkdir(yard + "/" + ctrl) + subprocess.check_call(["mount", "-t", "cgroup", "none", yard + "/" + ctrl, "-o", opts]) + +if sys.argv[1] == "--post-restore": + ''' + Clean up the cgroup yard created during `--pre-dump` + ''' + with open("/proc/self/cgroup") as f: + for line in f: + cgr = line.split(":")[1] + + if cgr == "": + continue + + if cgr.startswith("name="): + ctrl = cgr[len("name="):] + else: + ctrl = cgr + + subprocess.check_call(["umount", yard + "/" + ctrl]) + os.rmdir(yard + "/" + ctrl) + os.rmdir(yard) + +if sys.argv[1] in ["--pre-restore", "--clean"]: + ''' + Clean up the leftover cgroups created by the test + ''' + tname = tempfile.mkdtemp() + subprocess.call(["mount", "-t", "cgroup", "none", tname, "-o", "none,name=zdtmtst"]) + + try: + os.rmdir(os.path.join(tname, "subcg00", "subsubcg")) + except: + pass + + try: + os.rmdir(os.path.join(tname, "subcg00")) + except: + pass + + subprocess.call(["umount", tname]) + os.rmdir(tname) From cc1fcfa648ca925227c2aca6f27dc23310e4584e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 14 Sep 2019 13:47:06 +0100 Subject: [PATCH 077/277] travis: Install missing diffutils dependency The following tests fail in Fedora rawhide because /usr/bin/diff is missing. * zdtm/static/bridge(ns) * zdtm/static/cr_veth(uns) * zdtm/static/macvlan(ns) * zdtm/static/netns(uns) * zdtm/static/netns-nf(ns) * zdtm/static/sit(ns) Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index b1127c9b23..0500a8fc55 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -3,6 +3,7 @@ ARG ENV1=FOOBAR RUN dnf install -y \ ccache \ + diffutils \ findutils \ gcc \ git \ From cf54e13905419ee2d039b17df2062aaf0093484f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 14 Sep 2019 10:26:22 +0300 Subject: [PATCH 078/277] arch/x86: fpu_state->fpu_state_ia32.xsave hast to be 64-byte aligned Before the 5.2 kernel, only fpu_state->fpu_state_64.xsave has to be 64-byte aligned. But staring with the 5.2 kernel, the same is required for pu_state->fpu_state_ia32.xsave. The behavior was changed in: c2ff9e9a3d9d ("x86/fpu: Merge the two code paths in __fpu__restore_sig()") Signed-off-by: Andrei Vagin --- compel/arch/x86/src/lib/include/uapi/asm/fpu.h | 8 ++++++-- criu/arch/x86/sigframe.c | 6 ++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h index 509f4488b3..4ff531fb9c 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h @@ -263,7 +263,7 @@ struct xsave_struct_ia32 { struct ymmh_struct ymmh; uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE]; }; -} __aligned(FXSAVE_ALIGN_BYTES); +}; typedef struct { /* @@ -309,7 +309,11 @@ typedef struct { typedef struct { union { fpu_state_64_t fpu_state_64; - fpu_state_ia32_t fpu_state_ia32; + struct { + /* fpu_state_ia32->xsave has to be 64-byte aligned. */ + uint32_t __pad[2]; + fpu_state_ia32_t fpu_state_ia32; + }; }; uint8_t has_fpu; diff --git a/criu/arch/x86/sigframe.c b/criu/arch/x86/sigframe.c index 11b0d640de..33ba14387f 100644 --- a/criu/arch/x86/sigframe.c +++ b/criu/arch/x86/sigframe.c @@ -28,8 +28,14 @@ int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, sigframe->native.uc.uc_mcontext.fpstate = (uint64_t)addr; } else if (!sigframe->is_native) { + unsigned long addr = (unsigned long)(void *)&fpu_state->fpu_state_ia32.xsave; sigframe->compat.uc.uc_mcontext.fpstate = (uint32_t)(unsigned long)(void *)&fpu_state->fpu_state_ia32; + if ((addr % 64ul)) { + pr_err("Unaligned address passed: %lx (native %d)\n", + addr, sigframe->is_native); + return -1; + } } return 0; From 463227b8d5cfb91500541c2a98f4f4fa5658469b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Sep 2019 06:58:15 +0100 Subject: [PATCH 079/277] Introduce flush_early_log_to_stderr destructor Prior log initialisation CRIU preserves all (early) log messages in a buffer. In case of error the content of the content of this buffer needs to be printed out (flushed). Suggested-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Radostin Stoyanov --- criu/crtools.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index c0de1c03d0..efa5da0524 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -48,6 +48,13 @@ #include "sysctl.h" #include "img-remote.h" +void flush_early_log_to_stderr() __attribute__((destructor)); + +void flush_early_log_to_stderr(void) +{ + flush_early_log_buffer(STDERR_FILENO); +} + int main(int argc, char *argv[], char *envp[]) { int ret = -1; @@ -96,10 +103,8 @@ int main(int argc, char *argv[], char *envp[]) return cr_service_work(atoi(argv[2])); } - if (check_options()) { - flush_early_log_buffer(STDERR_FILENO); + if (check_options()) return 1; - } if (opts.imgs_dir == NULL) SET_CHAR_OPTS(imgs_dir, "."); From 8d2a56ac4e714fd0c2e35b42dc366b7f0b0f2b6f Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Sep 2019 07:03:57 +0100 Subject: [PATCH 080/277] Convert pr_msg() error messages to pr_err() Print error messages to stderr (instead of stdout). Suggested-by: Andrei Vagin Signed-off-by: Radostin Stoyanov --- criu/config.c | 4 ++-- criu/cr-check.c | 2 +- criu/crtools.c | 26 +++++++++++++------------- criu/proc_parse.c | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/criu/config.c b/criu/config.c index 1a6d0141d3..c9332203a7 100644 --- a/criu/config.c +++ b/criu/config.c @@ -836,10 +836,10 @@ int parse_options(int argc, char **argv, bool *usage_error, bad_arg: if (idx < 0) /* short option */ - pr_msg("Error: invalid argument for -%c: %s\n", + pr_err("invalid argument for -%c: %s\n", opt, optarg); else /* long option */ - pr_msg("Error: invalid argument for --%s: %s\n", + pr_err("invalid argument for --%s: %s\n", long_opts[idx].name, optarg); return 1; } diff --git a/criu/cr-check.c b/criu/cr-check.c index 75a665cfbf..729b2dc38e 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -62,7 +62,7 @@ static int check_tty(void) int ret = -1; if (ARRAY_SIZE(t.c_cc) < TERMIOS_NCC) { - pr_msg("struct termios has %d @c_cc while " + pr_err("struct termios has %d @c_cc while " "at least %d expected.\n", (int)ARRAY_SIZE(t.c_cc), TERMIOS_NCC); diff --git a/criu/crtools.c b/criu/crtools.c index efa5da0524..4d9307e5c1 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -113,7 +113,7 @@ int main(int argc, char *argv[], char *envp[]) SET_CHAR_OPTS(work_dir, opts.imgs_dir); if (optind >= argc) { - pr_msg("Error: command is required\n"); + pr_err("command is required\n"); goto usage; } @@ -121,17 +121,17 @@ int main(int argc, char *argv[], char *envp[]) if (has_exec_cmd) { if (!has_sub_command) { - pr_msg("Error: --exec-cmd requires a command\n"); + pr_err("--exec-cmd requires a command\n"); goto usage; } if (strcmp(argv[optind], "restore")) { - pr_msg("Error: --exec-cmd is available for the restore command only\n"); + pr_err("--exec-cmd is available for the restore command only\n"); goto usage; } if (opts.restore_detach) { - pr_msg("Error: --restore-detached and --exec-cmd cannot be used together\n"); + pr_err("--restore-detached and --exec-cmd cannot be used together\n"); goto usage; } @@ -143,7 +143,7 @@ int main(int argc, char *argv[], char *envp[]) } else { /* No subcommands except for cpuinfo and restore --exec-cmd */ if (strcmp(argv[optind], "cpuinfo") && has_sub_command) { - pr_msg("Error: excessive parameter%s for command %s\n", + pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", argv[optind]); goto usage; } @@ -242,7 +242,7 @@ int main(int argc, char *argv[], char *envp[]) if (!strcmp(argv[optind], "image-proxy")) { if (!opts.addr) { - pr_msg("Error: address not specified\n"); + pr_err("address not specified\n"); return 1; } if (!opts.port) @@ -258,7 +258,7 @@ int main(int argc, char *argv[], char *envp[]) if (!strcmp(argv[optind], "cpuinfo")) { if (!argv[optind + 1]) { - pr_msg("Error: cpuinfo requires an action: dump or check\n"); + pr_err("cpuinfo requires an action: dump or check\n"); goto usage; } if (!strcmp(argv[optind + 1], "dump")) @@ -268,17 +268,17 @@ int main(int argc, char *argv[], char *envp[]) } if (!strcmp(argv[optind], "exec")) { - pr_msg("The \"exec\" action is deprecated by the Compel library.\n"); + pr_err("The \"exec\" action is deprecated by the Compel library.\n"); return -1; } if (!strcmp(argv[optind], "show")) { - pr_msg("The \"show\" action is deprecated by the CRIT utility.\n"); - pr_msg("To view an image use the \"crit decode -i $name --pretty\" command.\n"); + pr_err("The \"show\" action is deprecated by the CRIT utility.\n"); + pr_err("To view an image use the \"crit decode -i $name --pretty\" command.\n"); return -1; } - pr_msg("Error: unknown command: %s\n", argv[optind]); + pr_err("unknown command: %s\n", argv[optind]); usage: pr_msg("\n" "Usage:\n" @@ -478,10 +478,10 @@ int main(int argc, char *argv[], char *envp[]) return 0; opt_port_missing: - pr_msg("Error: port not specified\n"); + pr_err("port not specified\n"); return 1; opt_pid_missing: - pr_msg("Error: pid not specified\n"); + pr_err("pid not specified\n"); return 1; } diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 0e8b6f209f..97f82ee013 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -932,7 +932,7 @@ int prepare_loginuid(unsigned int value, unsigned int loglevel) if (write(fd, buf, 11) < 0) { print_on_level(loglevel, - "Write %s to /proc/self/loginuid failed: %s", + "Write %s to /proc/self/loginuid failed: %s\n", buf, strerror(errno)); ret = -1; } From 3e03d51ac4f3256b4c93deb31c1800f0b6e4cad8 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 15 Sep 2019 11:49:27 -0700 Subject: [PATCH 081/277] test/cgroup_yard: always clean up a test cgroup yard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now it is cleaned up from a post-restore hook, but zdtm.py can be executed with the norst option: $ zdtm.py run -t zdtm/static/cgroup_yard --norst ... OSError: [Errno 17] File exists: 'external_yard' Cc: MichaÅ‚ CÅ‚apiÅ„ski Signed-off-by: Andrei Vagin --- test/zdtm/static/cgroup_yard.hook | 39 ++++++++----------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook index 7ae53342c9..cc39717070 100755 --- a/test/zdtm/static/cgroup_yard.hook +++ b/test/zdtm/static/cgroup_yard.hook @@ -12,6 +12,7 @@ if sys.argv[1] == "--pre-dump": Create external cgroup yard to be passed to CRIU via --cgroup-yard ''' os.mkdir(yard) + subprocess.check_call(["mount", "-t", "tmpfs", "zdtm_yard", yard]) with open("/proc/self/cgroup") as f: for line in f: cgr = line.split(":")[1] @@ -29,26 +30,6 @@ if sys.argv[1] == "--pre-dump": os.mkdir(yard + "/" + ctrl) subprocess.check_call(["mount", "-t", "cgroup", "none", yard + "/" + ctrl, "-o", opts]) -if sys.argv[1] == "--post-restore": - ''' - Clean up the cgroup yard created during `--pre-dump` - ''' - with open("/proc/self/cgroup") as f: - for line in f: - cgr = line.split(":")[1] - - if cgr == "": - continue - - if cgr.startswith("name="): - ctrl = cgr[len("name="):] - else: - ctrl = cgr - - subprocess.check_call(["umount", yard + "/" + ctrl]) - os.rmdir(yard + "/" + ctrl) - os.rmdir(yard) - if sys.argv[1] in ["--pre-restore", "--clean"]: ''' Clean up the leftover cgroups created by the test @@ -56,15 +37,15 @@ if sys.argv[1] in ["--pre-restore", "--clean"]: tname = tempfile.mkdtemp() subprocess.call(["mount", "-t", "cgroup", "none", tname, "-o", "none,name=zdtmtst"]) - try: - os.rmdir(os.path.join(tname, "subcg00", "subsubcg")) - except: - pass - - try: - os.rmdir(os.path.join(tname, "subcg00")) - except: - pass + for cg in [os.path.join(tname, "subcg00", "subsubcg"), + os.path.join(tname, "subcg00")]: + if os.access(cg, os.F_OK): + os.rmdir(cg) subprocess.call(["umount", tname]) os.rmdir(tname) + +if sys.argv[1] == "--clean": + if os.access(yard, os.F_OK): + subprocess.call(["umount", "-l", yard]) + os.rmdir(yard) From 78f70bd39b8e6404a77e2c447415386d9ced0a85 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 19 Sep 2019 23:37:57 +0300 Subject: [PATCH 082/277] zdtm/cgroup_yard: create a test cgroup yard from the post-start hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, it is created from the pre-dump hook, but if the --snap option is set, the test fails: $ python test/zdtm.py run -t zdtm/static/cgroup_yard -f h --snap --iter 3 ... Running zdtm/static/cgroup_yard.hook(--pre-dump) Traceback (most recent call last): File zdtm/static/cgroup_yard.hook, line 14, in os.mkdir(yard) OSError: [Errno 17] File exists: 'external_yard' Cc: MichaÅ‚ CÅ‚apiÅ„ski Signed-off-by: Andrei Vagin --- test/zdtm/static/cgroup_yard.hook | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook index cc39717070..072b9d38d7 100755 --- a/test/zdtm/static/cgroup_yard.hook +++ b/test/zdtm/static/cgroup_yard.hook @@ -7,7 +7,7 @@ import tempfile yard = "external_yard" -if sys.argv[1] == "--pre-dump": +if sys.argv[1] == "--post-start": ''' Create external cgroup yard to be passed to CRIU via --cgroup-yard ''' From bd80be8d06382ed4be47ca26cd90410e829ab98d Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Sat, 21 Sep 2019 13:35:18 +0300 Subject: [PATCH 083/277] cgroup: fixup nits 1) s/\s*$// 2) fix snprintf out of bound access Signed-off-by: Pavel Tikhomirov --- criu/cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index 9f3aef10d1..1be8be234a 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -590,12 +590,12 @@ static int collect_cgroups(struct list_head *ctls) if (opts.cgroup_yard) { char dir_path[PATH_MAX]; int off; - + off = snprintf(dir_path, PATH_MAX, "%s/", opts.cgroup_yard); if (strstartswith(cc->name, namestr)) - snprintf(dir_path + off, PATH_MAX, "%s", cc->name + strlen(namestr)); + snprintf(dir_path + off, PATH_MAX - off, "%s", cc->name + strlen(namestr)); else - snprintf(dir_path + off, PATH_MAX, "%s", cc->name); + snprintf(dir_path + off, PATH_MAX - off, "%s", cc->name); fd = open(dir_path, O_RDONLY | O_DIRECTORY, 0); if (fd < 0) { From 7df16f99a4861dbc5937e5bf1bbf1c38a0471320 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 24 Sep 2019 23:36:29 +0300 Subject: [PATCH 084/277] pipe: print pipe_id as unsigned to generate an external pipe name Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/pipes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pipes.c b/criu/pipes.c index fd1a7e6bb2..cb5da71de4 100644 --- a/criu/pipes.c +++ b/criu/pipes.c @@ -282,8 +282,8 @@ static char *pipe_d_name(struct file_desc *d, char *buf, size_t s) struct pipe_info *pi; pi = container_of(d, struct pipe_info, d); - if (snprintf(buf, s, "pipe:[%d]", pi->pe->pipe_id) >= s) { - pr_err("Not enough room for pipe %d identifier string\n", + if (snprintf(buf, s, "pipe:[%u]", pi->pe->pipe_id) >= s) { + pr_err("Not enough room for pipe %u identifier string\n", pi->pe->pipe_id); return NULL; } From 18168867ea7f7a7bb5d151a025a6d78db374aac9 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 24 Sep 2019 23:48:15 +0300 Subject: [PATCH 085/277] unix: print inode numbers as unsigned int Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/sk-unix.c | 60 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index f0620e6761..f43aa21244 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -130,7 +130,7 @@ static struct unix_sk_listen_icon *lookup_unix_listen_icons(unsigned int peer_in static void show_one_unix(char *act, const struct unix_sk_desc *sk) { - pr_debug("\t%s: ino %d peer_ino %d family %4d type %4d state %2d name %s\n", + pr_debug("\t%s: ino %u peer_ino %u family %4d type %4d state %2d name %s\n", act, sk->sd.ino, sk->peer_ino, sk->sd.family, sk->type, sk->state, sk->name); if (sk->nr_icons) { @@ -143,7 +143,7 @@ static void show_one_unix(char *act, const struct unix_sk_desc *sk) static void show_one_unix_img(const char *act, const UnixSkEntry *e) { - pr_info("\t%s: id %#x ino %d peer %d type %d state %d name %d bytes\n", + pr_info("\t%s: id %#x ino %u peer %u type %d state %d name %d bytes\n", act, e->id, e->ino, e->peer, e->type, e->state, (int)e->name.len); } @@ -426,7 +426,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) if (ue->peer) { peer = (struct unix_sk_desc *)lookup_socket(ue->peer, PF_UNIX, 0); if (IS_ERR_OR_NULL(peer)) { - pr_err("Unix socket %d without peer %d\n", + pr_err("Unix socket %u without peer %u\n", ue->ino, ue->peer); goto err; } @@ -437,7 +437,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) */ if (peer->peer_ino != ue->ino) { if (!peer->name) { - pr_err("Unix socket %d with unreachable peer %d (%d)\n", + pr_err("Unix socket %u with unreachable peer %u (%u)\n", ue->ino, ue->peer, peer->peer_ino); goto err; } @@ -513,7 +513,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) ue->peer = e->sk_desc->sd.ino; - pr_debug("\t\tFixed inflight socket %d peer %d)\n", + pr_debug("\t\tFixed inflight socket %u peer %u)\n", ue->ino, ue->peer); } dump: @@ -1383,7 +1383,7 @@ static int keep_deleted(struct unix_sk_info *ui) { int fd = open(ui->name, O_PATH); if (fd < 0) { - pr_perror("ghost: Can't open id %#x ino %d addr %s", + pr_perror("ghost: Can't open id %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); return -1; } @@ -1409,7 +1409,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) int ret; if (ui->ue->name.len >= UNIX_PATH_MAX) { - pr_err("ghost: Too long name for socket id %#x ino %d name %s\n", + pr_err("ghost: Too long name for socket id %#x ino %u name %s\n", ui->ue->id, ui->ue->ino, ui->name); return -ENOSPC; } @@ -1424,14 +1424,14 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) ret = access(path, R_OK | W_OK | X_OK); if (ret == 0) { ui->ghost_dir_pos = pos - path; - pr_debug("ghost: socket id %#x ino %d name %s detected F_OK %s\n", + pr_debug("ghost: socket id %#x ino %u name %s detected F_OK %s\n", ui->ue->id, ui->ue->ino, ui->name, path); break; } if (errno != ENOENT) { ret = -errno; - pr_perror("ghost: Can't access %s for socket id %#x ino %d name %s", + pr_perror("ghost: Can't access %s for socket id %#x ino %u name %s", path, ui->ue->id, ui->ue->ino, ui->name); return ret; } @@ -1441,7 +1441,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) path[ui->ue->name.len] = '\0'; pos = dirname(path); - pr_debug("ghost: socket id %#x ino %d name %s creating %s\n", + pr_debug("ghost: socket id %#x ino %u name %s creating %s\n", ui->ue->id, ui->ue->ino, ui->name, pos); ret = mkdirpat(AT_FDCWD, pos, 0755); if (ret) { @@ -1471,15 +1471,15 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) * clean it up. */ if (unlinkat(AT_FDCWD, path_parked, 0) == 0) - pr_debug("ghost: Unlinked stale socket id %#x ino %d name %s\n", + pr_debug("ghost: Unlinked stale socket id %#x ino %u name %s\n", ui->ue->id, ui->ue->ino, path_parked); if (rename(ui->name, path_parked)) { ret = -errno; - pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s", + pr_perror("ghost: Can't rename id %#x ino %u addr %s -> %s", ui->ue->id, ui->ue->ino, ui->name, path_parked); return ret; } - pr_debug("ghost: id %#x ino %d renamed %s -> %s\n", + pr_debug("ghost: id %#x ino %u renamed %s -> %s\n", ui->ue->id, ui->ue->ino, ui->name, path_parked); renamed = true; ret = bind(sk, (struct sockaddr *)&addr, @@ -1487,7 +1487,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) } if (ret < 0) { ret = -errno; - pr_perror("ghost: Can't bind on socket id %#x ino %d addr %s", + pr_perror("ghost: Can't bind on socket id %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); return ret; } @@ -1499,7 +1499,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) ret = keep_deleted(ui); if (ret < 0) { - pr_err("ghost: Can't save socket %#x ino %d addr %s into fdstore\n", + pr_err("ghost: Can't save socket %#x ino %u addr %s into fdstore\n", ui->ue->id, ui->ue->ino, ui->name); return -EIO; } @@ -1511,7 +1511,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) ret = unlinkat(AT_FDCWD, ui->name, 0); if (ret < 0) { ret = -errno; - pr_perror("ghost: Can't unlink socket %#x ino %d addr %s", + pr_perror("ghost: Can't unlink socket %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); return ret; } @@ -1519,12 +1519,12 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) if (renamed) { if (rename(path_parked, ui->name)) { ret = -errno; - pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s", + pr_perror("ghost: Can't rename id %#x ino %u addr %s -> %s", ui->ue->id, ui->ue->ino, path_parked, ui->name); return ret; } - pr_debug("ghost: id %#x ino %d renamed %s -> %s\n", + pr_debug("ghost: id %#x ino %u renamed %s -> %s\n", ui->ue->id, ui->ue->ino, path_parked, ui->name); } @@ -1542,11 +1542,11 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) pos = strrchr(path, '/')) { *pos = '\0'; if (rmdir(path)) { - pr_perror("ghost: Can't remove directory %s on id %#x ino %d", + pr_perror("ghost: Can't remove directory %s on id %#x ino %u", path, ui->ue->id, ui->ue->ino); return -1; } - pr_debug("ghost: Removed %s on id %#x ino %d\n", + pr_debug("ghost: Removed %s on id %#x ino %u\n", path, ui->ue->id, ui->ue->ino); } } @@ -1594,13 +1594,13 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) mutex_lock(mutex_ghost); if (ui->flags & USK_GHOST_FDSTORE) { - pr_debug("ghost: bind id %#x ino %d addr %s\n", + pr_debug("ghost: bind id %#x ino %u addr %s\n", ui->ue->id, ui->ue->ino, ui->name); ret = bind_on_deleted(sk, ui); if (ret) errno = -ret; } else { - pr_debug("bind id %#x ino %d addr %s\n", + pr_debug("bind id %#x ino %u addr %s\n", ui->ue->id, ui->ue->ino, ui->name); ret = bind(sk, (struct sockaddr *)&addr, sizeof(addr.sun_family) + ui->ue->name.len); @@ -1608,7 +1608,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) goto done; } if (ret < 0) { - pr_perror("Can't bind id %#x ino %d addr %s", + pr_perror("Can't bind id %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); goto done; } @@ -1654,7 +1654,7 @@ static int post_open_interconnected_master(struct unix_sk_info *ui) static void pr_info_opening(const char *prefix, struct unix_sk_info *ui, struct fdinfo_list_entry *fle) { - pr_info("Opening %s (stage %d id %#x ino %d peer %d)\n", + pr_info("Opening %s (stage %d id %#x ino %u peer %u)\n", prefix, fle->stage, ui->ue->id, ui->ue->ino, ui->ue->peer); } @@ -1950,7 +1950,7 @@ static char *socket_d_name(struct file_desc *d, char *buf, size_t s) ui = container_of(d, struct unix_sk_info, d); - if (snprintf(buf, s, "socket:[%d]", ui->ue->ino) >= s) { + if (snprintf(buf, s, "socket:[%u]", ui->ue->ino) >= s) { pr_err("Not enough room for unixsk %d identifier string\n", ui->ue->ino); return NULL; @@ -1981,14 +1981,14 @@ static int unlink_sk(struct unix_sk_info *ui) ret = unlinkat(AT_FDCWD, ui->name, 0) ? -1 : 0; if (ret < 0 && errno != ENOENT) { - pr_warn("Can't unlink socket %d peer %d (name %s dir %s)\n", + pr_warn("Can't unlink socket %u peer %u (name %s dir %s)\n", ui->ue->ino, ui->ue->peer, ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-", ui->name_dir ? ui->name_dir : "-"); ret = -errno; goto out; } else if (ret == 0) { - pr_debug("Unlinked socket %d peer %d (name %s dir %s)\n", + pr_debug("Unlinked socket %u peer %u (name %s dir %s)\n", ui->ue->ino, ui->ue->peer, ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-", ui->name_dir ? ui->name_dir : "-"); @@ -2065,7 +2065,7 @@ int unix_prepare_root_shared(void) char tp_name[32]; char st_name[32]; - pr_debug("ghost: id %#x type %s state %s ino %d peer %d address %s\n", + pr_debug("ghost: id %#x type %s state %s ino %u peer %u address %s\n", ui->ue->id, __socket_type_name(ui->ue->type, tp_name), __tcp_state_name(ui->ue->state, st_name), ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0, @@ -2113,7 +2113,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) uname = "-"; } - pr_info(" `- Got id %#x ino %d type %s state %s peer %d (name %s%.*s dir %s)\n", + pr_info(" `- Got id %#x ino %u type %s state %s peer %u (name %s%.*s dir %s)\n", ui->ue->id, ui->ue->ino, ___socket_type_name(ui->ue->type), ___tcp_state_name(ui->ue->state), ui->ue->peer, prefix, ulen, uname, ui->name_dir ? ui->name_dir : "-"); @@ -2128,7 +2128,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) if (ui->ue->deleted) { if (!ui->name || !ui->ue->name.len || !ui->name[0]) { - pr_err("No name present, ino %d\n", ui->ue->ino); + pr_err("No name present, ino %u\n", ui->ue->ino); return -1; } From fdbaf4a07ff97c5fa034df45b7b63230159b8261 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Sep 2019 20:57:08 +0000 Subject: [PATCH 086/277] Cleanup do_full_int80() 1) Instead of tampering with the nr argument, do_full_int80() returns the value of the system call. It also avoids copying all registers back into the syscall_args32 argument after the syscall. 2) Additionally, the registers r12-r15 were added in the list of clobbers as kernels older than v4.4 do not preserve these. 3) Further, GCC uses a 128-byte red-zone as defined in the x86_64 ABI optimizing away the correct position of the %rsp register in leaf-functions. We now avoid tampering with the red-zone, fixing a SIGSEGV when running mmap_bug_test() in debug mode (DEBUG=1). Signed-off-by: Nicolas Viennot Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- criu/arch/x86/crtools.c | 6 ++-- criu/arch/x86/include/asm/compat.h | 51 ++++++++++++++++++++---------- criu/arch/x86/kerndat.c | 4 +-- criu/arch/x86/restorer.c | 3 +- criu/arch/x86/sigaction_compat.c | 6 +--- 5 files changed, 40 insertions(+), 30 deletions(-) diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index efc23e5fea..e4073c27b6 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -590,8 +590,7 @@ static int get_robust_list32(pid_t pid, uintptr_t head, uintptr_t len) .arg2 = (uint32_t)len, }; - do_full_int80(&s); - return (int)s.nr; + return do_full_int80(&s); } static int set_robust_list32(uint32_t head, uint32_t len) @@ -602,8 +601,7 @@ static int set_robust_list32(uint32_t head, uint32_t len) .arg1 = len, }; - do_full_int80(&s); - return (int)s.nr; + return do_full_int80(&s); } int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info) diff --git a/criu/arch/x86/include/asm/compat.h b/criu/arch/x86/include/asm/compat.h index cd1ae472d7..acd552fb38 100644 --- a/criu/arch/x86/include/asm/compat.h +++ b/criu/arch/x86/include/asm/compat.h @@ -38,26 +38,45 @@ struct syscall_args32 { uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5; }; -static inline void do_full_int80(struct syscall_args32 *args) +static inline uint32_t do_full_int80(struct syscall_args32 *args) { /* - * r8-r11 registers are cleared during returning to userspace - * from syscall - that's x86_64 ABI to avoid leaking kernel - * pointers. + * Kernel older than v4.4 do not preserve r8-r15 registers when + * invoking int80, so we need to preserve them. * - * Other than that - we can't use %rbp in clobbers as GCC's inline - * assembly doesn't allow to do so. So, here is explicitly saving - * %rbp before syscall and restoring it's value afterward. + * Additionally, %rbp is used as the 6th syscall argument, and we need + * to preserve its value when returning from the syscall to avoid + * upsetting GCC. However, we can't use %rbp in the GCC asm clobbers + * due to a GCC limitation. Instead, we explicitly save %rbp on the + * stack before invoking the syscall and restore its value afterward. + * + * Further, GCC may not adjust the %rsp pointer when allocating the + * args and ret variables because 1) do_full_int80() is a leaf + * function, and 2) the local variables (args and ret) are in the + * 128-byte red-zone as defined in the x86_64 ABI. To use the stack + * when preserving %rbp, we must either tell GCC to a) mark the + * function as non-leaf, or b) move away from the red-zone when using + * the stack. It seems that there is no easy way to do a), so we'll go + * with b). + * Note 1: Another workaround would have been to add %rsp in the list + * of clobbers, but this was deprecated in GCC 9. + * Note 2: This red-zone bug only manifests when compiling CRIU with + * DEBUG=1. */ - asm volatile ("pushq %%rbp\n\t" - "mov %6, %%ebp\n\t" - "int $0x80\n\t" - "mov %%ebp, %6\n\t" - "popq %%rbp\n\t" - : "+a" (args->nr), - "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+g" (args->arg5) - : : "r8", "r9", "r10", "r11"); + uint32_t ret; + + asm volatile ("sub $128, %%rsp\n\t" + "pushq %%rbp\n\t" + "mov %7, %%ebp\n\t" + "int $0x80\n\t" + "popq %%rbp\n\t" + "add $128, %%rsp\n\t" + : "=a" (ret) + : "a" (args->nr), + "b" (args->arg0), "c" (args->arg1), "d" (args->arg2), + "S" (args->arg3), "D" (args->arg4), "g" (args->arg5) + : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); + return ret; } #ifndef CR_NOGLIBC diff --git a/criu/arch/x86/kerndat.c b/criu/arch/x86/kerndat.c index f7593251b2..94c954e1e4 100644 --- a/criu/arch/x86/kerndat.c +++ b/criu/arch/x86/kerndat.c @@ -75,9 +75,7 @@ void *mmap_ia32(void *addr, size_t len, int prot, s.arg4 = fildes; s.arg5 = (uint32_t)off; - do_full_int80(&s); - - return (void *)(uintptr_t)s.nr; + return (void *)(uintptr_t)do_full_int80(&s); } /* diff --git a/criu/arch/x86/restorer.c b/criu/arch/x86/restorer.c index 2d335d5e1d..b2c3b3668a 100644 --- a/criu/arch/x86/restorer.c +++ b/criu/arch/x86/restorer.c @@ -54,8 +54,7 @@ int set_compat_robust_list(uint32_t head_ptr, uint32_t len) .arg1 = len, }; - do_full_int80(&s); - return (int)s.nr; + return do_full_int80(&s); } static int prepare_stack32(void **stack32) diff --git a/criu/arch/x86/sigaction_compat.c b/criu/arch/x86/sigaction_compat.c index b38ba80118..f467da490e 100644 --- a/criu/arch/x86/sigaction_compat.c +++ b/criu/arch/x86/sigaction_compat.c @@ -28,7 +28,6 @@ extern char restore_rt_sigaction; */ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act) { - int ret; struct syscall_args32 arg = {}; unsigned long act_stack = (unsigned long)stack32; @@ -49,8 +48,5 @@ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act) arg.arg2 = 0; /* oldact */ arg.arg3 = (uint32_t)sizeof(act->rt_sa_mask); /* sigsetsize */ - do_full_int80(&arg); - asm volatile ("\t movl %%eax,%0\n" : "=r"(ret)); - return ret; + return do_full_int80(&arg); } - From 36e1da911ac1e9e80c37e45e9d99d6f4a9311e5b Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 4 Oct 2019 16:32:48 +0100 Subject: [PATCH 087/277] restorer/inotify: Don't overflow PIE stack PATH_MAX == 4096; PATH_MAX*8 == 32k; RESTORE_STACK_SIZE == 32k. Fixes: a3cdf948699c6 ("inotify: cleanup auxiliary events from queue") Cc: Pavel Tikhomirov Cc: Andrei Vagin Co-debugged-with: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/pie/restorer.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 390c0e1a9a..dab58add6a 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1320,21 +1320,23 @@ static int fd_poll(int inotify_fd) } /* - * note: Actually kernel may want even more space for one event (see - * round_event_name_len), so using buffer of EVENT_BUFF_SIZE size may fail. - * To be on the safe side - take a bigger buffer, and these also allows to - * read more events in one syscall. + * In the worst case buf size should be: + * sizeof(struct inotify_event) * 2 + PATH_MAX + * See round_event_name_len() in kernel. */ -#define EVENT_BUFF_SIZE ((sizeof(struct inotify_event) + PATH_MAX)) +#define EVENT_BUFF_SIZE ((sizeof(struct inotify_event) * 2 + PATH_MAX)) /* * Read all available events from inotify queue */ static int cleanup_inotify_events(int inotify_fd) { - char buf[EVENT_BUFF_SIZE * 8]; + char buf[EVENT_BUFF_SIZE * 3]; int ret; + /* Limit buf to be lesser than half of restorer's stack */ + BUILD_BUG_ON(ARRAY_SIZE(buf) >= RESTORE_STACK_SIZE/2); + while (1) { ret = fd_poll(inotify_fd); if (ret < 0) { From e515ef07252c7a5b3234baf1eb1a733cce0904e9 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:18 +0530 Subject: [PATCH 088/277] Adding --pre-dump-mode option Two modes of pre-dump algorithm: 1) splicing memory by parasite --pre-dump-mode=splice (default) 2) using process_vm_readv syscall --pre-dump-mode=read Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- Documentation/criu.txt | 6 ++++++ criu/config.c | 10 ++++++++++ criu/cr-service.c | 13 +++++++++++++ criu/crtools.c | 2 ++ criu/include/cr_options.h | 7 +++++++ criu/mem.c | 13 ++++++++++++- images/rpc.proto | 6 ++++++ lib/c/criu.c | 15 +++++++++++++++ lib/c/criu.h | 7 +++++++ test/zdtm.py | 9 ++++++++- 10 files changed, 86 insertions(+), 2 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 28913a7fb8..2729bc95a3 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -156,6 +156,12 @@ In addition, *page-server* options may be specified. Turn on memory changes tracker in the kernel. If the option is not passed the memory tracker get turned on implicitly. +*--pre-dump-mode*='mode':: + There are two 'mode' to operate pre-dump algorithm. The 'splice' mode + is parasite based, whereas 'read' mode is based on process_vm_readv + syscall. The 'read' mode incurs reduced frozen time and reduced + memory pressure as compared to 'splice' mode. Default is 'splice' mode. + *dump* ~~~~~~ Performs a checkpoint procedure. diff --git a/criu/config.c b/criu/config.c index c9332203a7..1b921d4549 100644 --- a/criu/config.c +++ b/criu/config.c @@ -276,6 +276,7 @@ void init_opts(void) opts.empty_ns = 0; opts.status_fd = -1; opts.log_level = DEFAULT_LOGLEVEL; + opts.pre_dump_mode = PRE_DUMP_SPLICE; } bool deprecated_ok(char *what) @@ -518,6 +519,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT("tls", &opts.tls), {"tls-no-cn-verify", no_argument, &opts.tls_no_cn_verify, true}, { "cgroup-yard", required_argument, 0, 1096 }, + { "pre-dump-mode", required_argument, 0, 1097}, { }, }; @@ -819,6 +821,14 @@ int parse_options(int argc, char **argv, bool *usage_error, case 1096: SET_CHAR_OPTS(cgroup_yard, optarg); break; + case 1097: + if (!strcmp("read", optarg)) { + opts.pre_dump_mode = PRE_DUMP_READ; + } else if (strcmp("splice", optarg)) { + pr_err("Unable to parse value of --pre-dump-mode\n"); + return 1; + } + break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) diff --git a/criu/cr-service.c b/criu/cr-service.c index 95ba2e5cea..392e9ac50c 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -473,6 +473,19 @@ static int setup_opts_from_req(int sk, CriuOpts *req) opts.lazy_pages = req->lazy_pages; } + if (req->has_pre_dump_mode) { + switch (req->pre_dump_mode) { + case CRIU_PRE_DUMP_MODE__SPLICE: + opts.pre_dump_mode = PRE_DUMP_SPLICE; + break; + case CRIU_PRE_DUMP_MODE__READ: + opts.pre_dump_mode = PRE_DUMP_READ; + break; + default: + goto err; + } + } + if (req->ps) { opts.port = (short)req->ps->port; diff --git a/criu/crtools.c b/criu/crtools.c index 4d9307e5c1..1bf2d98c35 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -451,6 +451,8 @@ int main(int argc, char *argv[], char *envp[]) " pages images of previous dump\n" " when used on restore, as soon as page is restored, it\n" " will be punched from the image\n" +" --pre-dump-mode splice - parasite based pre-dumping (default)\n" +" read - process_vm_readv syscall based pre-dumping\n" "\n" "Page/Service server options:\n" " --address ADDR address of server or service\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 5cbc56f95c..53bd5edcbb 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -38,6 +38,12 @@ struct cg_root_opt { char *newroot; }; +/* + * Pre-dump variants + */ +#define PRE_DUMP_SPLICE 1 /* Pre-dump using parasite */ +#define PRE_DUMP_READ 2 /* Pre-dump using process_vm_readv syscall */ + /* * Cgroup management options. */ @@ -81,6 +87,7 @@ struct cr_options { int evasive_devices; int link_remap_ok; int log_file_per_pid; + int pre_dump_mode; bool swrk_restore; char *output; char *root; diff --git a/criu/mem.c b/criu/mem.c index de66a62104..911b9d21c6 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -482,7 +482,18 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, if (mdc->lazy) memcpy(pargs_iovs(args), pp->iovs, sizeof(struct iovec) * pp->nr_iovs); - ret = drain_pages(pp, ctl, args); + + /* + * Faking drain_pages for pre-dump here. Actual drain_pages for pre-dump + * will happen after task unfreezing in cr_pre_dump_finish(). This is + * actual optimization which reduces time for which process was frozen + * during pre-dump. + */ + if (mdc->pre_dump && opts.pre_dump_mode == PRE_DUMP_READ) + ret = 0; + else + ret = drain_pages(pp, ctl, args); + if (!ret && !mdc->pre_dump) ret = xfer_pages(pp, &xfer); if (ret) diff --git a/images/rpc.proto b/images/rpc.proto index c402259acc..fc2f1bce28 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -47,6 +47,11 @@ enum criu_cg_mode { DEFAULT = 6; }; +enum criu_pre_dump_mode { + SPLICE = 1; + READ = 2; +}; + message criu_opts { required int32 images_dir_fd = 1; optional int32 pid = 2; /* if not set on dump, will dump requesting process */ @@ -121,6 +126,7 @@ message criu_opts { optional bool tls = 58; optional bool tls_no_cn_verify = 59; optional string cgroup_yard = 60; + optional criu_pre_dump_mode pre_dump_mode = 61 [default = SPLICE]; /* optional bool check_mounts = 128; */ } diff --git a/lib/c/criu.c b/lib/c/criu.c index 14ddff26db..fffb9fd9c4 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -336,6 +336,21 @@ int criu_set_parent_images(const char *path) return criu_local_set_parent_images(global_opts, path); } +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode) +{ + opts->rpc->has_pre_dump_mode = true; + if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) { + opts->rpc->pre_dump_mode = mode; + return 0; + } + return -1; +} + +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode) +{ + return criu_local_set_pre_dump_mode(global_opts, mode); +} + void criu_local_set_track_mem(criu_opts *opts, bool track_mem) { opts->rpc->has_track_mem = true; diff --git a/lib/c/criu.h b/lib/c/criu.h index cb37c5291d..22db0fdcfd 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -43,6 +43,11 @@ enum criu_cg_mode { CRIU_CG_MODE_DEFAULT, }; +enum criu_pre_dump_mode { + CRIU_PRE_DUMP_SPLICE = 1, + CRIU_PRE_DUMP_READ = 2 +}; + int criu_set_service_address(const char *path); void criu_set_service_fd(int fd); int criu_set_service_binary(const char *path); @@ -95,6 +100,7 @@ int criu_add_irmap_path(const char *path); int criu_add_inherit_fd(int fd, const char *key); int criu_add_external(const char *key); int criu_set_page_server_address_port(const char *address, int port); +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode); /* * The criu_notify_arg_t na argument is an opaque @@ -211,6 +217,7 @@ int criu_local_add_cg_yard(criu_opts *opts, const char *path); int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key); int criu_local_add_external(criu_opts *opts, const char *key); int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port); +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode); void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na)); diff --git a/test/zdtm.py b/test/zdtm.py index 98d113f5b0..7fdb8a3140 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1020,6 +1020,7 @@ def __init__(self, opts): self.__tls = self.__tls_options() if opts['tls'] else [] self.__criu_bin = opts['criu_bin'] self.__crit_bin = opts['crit_bin'] + self.__pre_dump_mode = opts['pre_dump_mode'] def fini(self): if self.__lazy_migrate: @@ -1276,6 +1277,8 @@ def dump(self, action, opts=[]): a_opts += ['--leave-stopped'] if self.__empty_ns: a_opts += ['--empty-ns', 'net'] + if self.__pre_dump_mode: + a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode] nowait = False if self.__lazy_migrate and action == "dump": @@ -1865,7 +1868,7 @@ def run_test(self, name, desc, flavor): 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'remote', - 'tls', 'criu_bin', 'crit_bin') + 'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) if self.__use_log: @@ -2512,6 +2515,10 @@ def clean_stuff(opts): rp.add_argument("--crit-bin", help="Path to crit binary", default='../crit/crit') +rp.add_argument("--pre-dump-mode", + help="Use splice or read mode of pre-dumping", + choices=['splice', 'read'], + default='splice') lp = sp.add_parser("list", help="List tests") lp.set_defaults(action=list_tests) From 1c673f399f19fd3d4861c3317eb26713517e8583 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:19 +0530 Subject: [PATCH 089/277] Handling iov generation for non-PROT_READ regions Skip iov-generation for regions not having PROT_READ, since process_vm_readv syscall can't process them during "read" pre-dump. Handle random order of "read" & "splice" pre-dumps. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 5 ++++ criu/mem.c | 56 ++++++++++++++++++++++++++++++++++++++++-- images/inventory.proto | 1 + 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 119c82d39f..03f6487eae 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1486,6 +1486,9 @@ static int cr_pre_dump_finish(int status) if (ret) goto err; + he.has_pre_dump_mode = true; + he.pre_dump_mode = opts.pre_dump_mode; + pstree_switch_state(root_item, TASK_ALIVE); timing_stop(TIME_FROZEN); @@ -1930,6 +1933,8 @@ int cr_dump_tasks(pid_t pid) if (ret) goto err; + he.has_pre_dump_mode = false; + ret = write_img_inventory(&he); if (ret) goto err; diff --git a/criu/mem.c b/criu/mem.c index 911b9d21c6..a5de237555 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -351,7 +351,8 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, struct page_pipe *pp, struct page_xfer *xfer, struct parasite_dump_pages_args *args, struct parasite_ctl *ctl, pmc_t *pmc, - bool has_parent, bool pre_dump) + bool has_parent, bool pre_dump, + int parent_predump_mode) { u64 off = 0; u64 *map; @@ -361,6 +362,52 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, !vma_area_is(vma, VMA_ANON_SHARED)) return 0; + /* + * To facilitate any combination of pre-dump modes to run after + * one another, we need to take extra care as discussed below. + * + * The SPLICE mode pre-dump, processes all type of memory regions, + * whereas READ mode pre-dump skips processing those memory regions + * which lacks PROT_READ flag. + * + * Now on mixing pre-dump modes: + * If SPLICE mode follows SPLICE mode : no issue + * -> everything dumped both the times + * + * If READ mode follows READ mode : no issue + * -> non-PROT_READ skipped both the time + * + * If READ mode follows SPLICE mode : no issue + * -> everything dumped at first, + * the non-PROT_READ skipped later + * + * If SPLICE mode follows READ mode : Need special care + * + * If READ pre-dump happens first, then it has skipped processing + * non-PROT_READ regions. Following SPLICE pre-dump expects pagemap + * entries for all mappings in parent pagemap, but last READ mode + * pre-dump cycle has skipped processing & pagemap generation for + * non-PROT_READ regions. So SPLICE mode throws error of missing + * pagemap entry for encountered non-PROT_READ mapping. + * + * To resolve this, the pre-dump-mode is stored in current pre-dump's + * inventoy file. This pre-dump mode is read back from this file + * (present in parent pre-dump dir) as parent-pre-dump-mode during + * next pre-dump. + * + * If parent-pre-dump-mode and next-pre-dump-mode are in READ-mode -> + * SPLICE-mode order, then SPLICE mode doesn't expect mappings for + * non-PROT_READ regions in parent-image and marks "has_parent=false". + */ + + if (!(vma->e->prot & PROT_READ)) { + if (opts.pre_dump_mode == PRE_DUMP_READ && pre_dump) + return 0; + if ((parent_predump_mode == PRE_DUMP_READ && + opts.pre_dump_mode == PRE_DUMP_SPLICE) || !pre_dump) + has_parent = false; + } + if (vma_entry_is(vma->e, VMA_AREA_AIORING)) { if (pre_dump) return 0; @@ -406,6 +453,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, unsigned long pmc_size; int possible_pid_reuse = 0; bool has_parent; + int parent_predump_mode = -1; pr_info("\n"); pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real); @@ -472,9 +520,13 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, */ args->off = 0; has_parent = !!xfer.parent && !possible_pid_reuse; + if(mdc->parent_ie) + parent_predump_mode = mdc->parent_ie->pre_dump_mode; + list_for_each_entry(vma_area, &vma_area_list->h, list) { ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, - &pmc, has_parent, mdc->pre_dump); + &pmc, has_parent, mdc->pre_dump, + parent_predump_mode); if (ret < 0) goto out_xfer; } diff --git a/images/inventory.proto b/images/inventory.proto index 7bc2b0c022..d1438e8c8c 100644 --- a/images/inventory.proto +++ b/images/inventory.proto @@ -16,4 +16,5 @@ message inventory_entry { optional uint32 root_cg_set = 5; optional lsmtype lsmtype = 6; optional uint64 dump_uptime = 8; + optional uint32 pre_dump_mode = 9; } From bb504a8abb6ab50aa5deefcfbfaf51f1e1cde989 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:20 +0530 Subject: [PATCH 090/277] Skip adding PROT_READ to non-PROT_READ mappings "read" mode pre-dump may fail even after adding PROT_READ flag. Adding PROT_READ works when dumping statically. See added comment for details. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/mem.c | 54 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/criu/mem.c b/criu/mem.c index a5de237555..4e110c9e96 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -591,13 +591,47 @@ int parasite_dump_pages_seized(struct pstree_item *item, * able to read the memory contents. * * Afterwards -- reprotect memory back. + * + * This step is required for "splice" mode pre-dump and dump. + * Skip this step for "read" mode pre-dump. + * "read" mode pre-dump delegates processing of non-PROT_READ + * regions to dump stage. Adding PROT_READ works fine for + * static processing (target process frozen during pre-dump) + * and fails for dynamic as explained below. + * + * Consider following sequence of instances to reason, why + * not to add PROT_READ in "read" mode pre-dump ? + * + * CRIU- "read" pre-dump Target Process + * + * 1. Creates mapping M + * without PROT_READ + * 2. CRIU freezes target + * process + * 3. Collect the mappings + * 4. Add PROT_READ to M + * (non-PROT_READ region) + * 5. CRIU unfreezes target + * process + * 6. Add flag PROT_READ + * to mapping M + * 7. Revoke flag PROT_READ + * from mapping M + * 8. process_vm_readv tries + * to copy mapping M + * (believing M have + * PROT_READ flag) + * 9. syscall fails to copy + * data from M */ - pargs->add_prot = PROT_READ; - ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl); - if (ret) { - pr_err("Can't dump unprotect vmas with parasite\n"); - return ret; + if (!mdc->pre_dump || opts.pre_dump_mode == PRE_DUMP_SPLICE) { + pargs->add_prot = PROT_READ; + ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl); + if (ret) { + pr_err("Can't dump unprotect vmas with parasite\n"); + return ret; + } } if (fault_injected(FI_DUMP_PAGES)) { @@ -612,10 +646,12 @@ int parasite_dump_pages_seized(struct pstree_item *item, return ret; } - pargs->add_prot = 0; - if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) { - pr_err("Can't rollback unprotected vmas with parasite\n"); - ret = -1; + if (!mdc->pre_dump || opts.pre_dump_mode == PRE_DUMP_SPLICE) { + pargs->add_prot = 0; + if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) { + pr_err("Can't rollback unprotected vmas with parasite\n"); + ret = -1; + } } return ret; From cce40196e4a0247c867b53529bed99acd9ca0536 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:21 +0530 Subject: [PATCH 091/277] Adding cnt_sub for stats manipulation adding cnt_sub function (complement of cnt_add). cnt_sub is utilized to decrement stats counter according to skipped page count during "read" mode pre-dump. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/include/stats.h | 1 + criu/stats.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/criu/include/stats.h b/criu/include/stats.h index bab9a0507c..5d408b7b10 100644 --- a/criu/include/stats.h +++ b/criu/include/stats.h @@ -45,6 +45,7 @@ enum { }; extern void cnt_add(int c, unsigned long val); +extern void cnt_sub(int c, unsigned long val); #define DUMP_STATS 1 #define RESTORE_STATS 2 diff --git a/criu/stats.c b/criu/stats.c index 7410b5ced3..cb528011a8 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -41,6 +41,18 @@ void cnt_add(int c, unsigned long val) BUG(); } +void cnt_sub(int c, unsigned long val) +{ + if (dstats != NULL) { + BUG_ON(c >= DUMP_CNT_NR_STATS); + dstats->counts[c] -= val; + } else if (rstats != NULL) { + BUG_ON(c >= RESTORE_CNT_NR_STATS); + atomic_sub(val, &rstats->counts[c]); + } else + BUG(); +} + static void timeval_accumulate(const struct timeval *from, const struct timeval *to, struct timeval *res) { From a39e7a96071d312a20a4d717adad3c5a84e2c70a Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:22 +0530 Subject: [PATCH 092/277] read mode pre-dump implementation Pre-dump using the process_vm_readv syscall. During frozen state, only iovecs will be generated and draining of memory happens after the task is unfrozen. Pre-dumping of shared memory remains unmodified. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 7 +- criu/include/page-xfer.h | 4 + criu/page-xfer.c | 389 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 399 insertions(+), 1 deletion(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 03f6487eae..23399a939e 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1514,7 +1514,12 @@ static int cr_pre_dump_finish(int status) goto err; mem_pp = dmpi(item)->mem_pp; - ret = page_xfer_dump_pages(&xfer, mem_pp); + + if (opts.pre_dump_mode == PRE_DUMP_READ) + ret = page_xfer_predump_pages(item->pid->real, + &xfer, mem_pp); + else + ret = page_xfer_dump_pages(&xfer, mem_pp); xfer.close(&xfer); diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h index fa72273ea0..98061e2d3d 100644 --- a/criu/include/page-xfer.h +++ b/criu/include/page-xfer.h @@ -9,6 +9,9 @@ struct ps_info { extern int cr_page_server(bool daemon_mode, bool lazy_dump, int cfd); +/* User buffer for read-mode pre-dump*/ +#define BUFFER_SIZE (PIPE_MAX_SIZE << PAGE_SHIFT) + /* * page_xfer -- transfer pages into image file. * Two images backends are implemented -- local image file @@ -48,6 +51,7 @@ struct page_xfer { extern int open_page_xfer(struct page_xfer *xfer, int fd_type, unsigned long id); struct page_pipe; extern int page_xfer_dump_pages(struct page_xfer *, struct page_pipe *); +extern int page_xfer_predump_pages(int pid, struct page_xfer *, struct page_pipe *); extern int connect_to_page_server_to_send(void); extern int connect_to_page_server_to_recv(int epfd); extern int disconnect_from_page_server(void); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index fe457d2017..ac1cca09e2 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -6,6 +6,7 @@ #include #include #include +#include #undef LOG_PREFIX #define LOG_PREFIX "page-xfer: " @@ -499,6 +500,394 @@ static inline u32 ppb_xfer_flags(struct page_xfer *xfer, struct page_pipe_buf *p return PE_PRESENT; } +/* + * Optimized pre-dump algorithm + * ============================== + * + * Note: Please refer man(2) page of process_vm_readv syscall. + * + * The following discussion covers the possibly faulty-iov + * locations in an iovec, which hinders process_vm_readv from + * dumping the entire iovec in a single invocation. + * + * Memory layout of target process: + * + * Pages: A B C + * +--------+--------+--------+--------+--------+--------+ + * ||||||||||||||||||||||||||||||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * + * Single "iov" representation: {starting_address, length_in_bytes} + * An iovec is array of iov-s. + * + * NOTE: For easy representation and discussion purpose, we carry + * out further discussion at "page granularity". + * length_in_bytes will represent page count in iov instead + * of byte count. Same assumption applies for the syscall's + * return value. Instead of returning the number of bytes + * read, it returns a page count. + * + * For above memory mapping, generated iovec: {A,1}{B,1}{C,4} + * + * This iovec remains unmodified once generated. At the same + * time some of memory regions listed in iovec may get modified + * (unmap/change protection) by the target process while syscall + * is trying to dump iovec regions. + * + * Case 1: + * A is unmapped, {A,1} become faulty iov + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * | |||||||||||||||||||||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | | + * start | + * (1) | + * start + * (2) + * + * process_vm_readv will return -1. Increment start pointer(2), + * syscall will process {B,1}{C,4} in one go and copy 5 pages + * to userbuf from iov-B and iov-C. + * + * Case 2: + * B is unmapped, {B,1} become faulty iov + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * ||||||||| ||||||||||||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | | + * start | + * (1) | + * start + * (2) + * + * process_vm_readv will return 1, i.e. page A copied to + * userbuf successfully and syscall stopped, since B got + * unmapped. + * + * Increment the start pointer to C(2) and invoke syscall. + * Userbuf contains 5 pages overall from iov-A and iov-C. + * + * Case 3: + * This case deals with partial unmapping of iov representing + * more than one pagesize region. + * + * Syscall can't process such faulty iov as whole. So we + * process such regions part-by-part and form new sub-iovs + * in aux_iov from successfully processed pages. + * + * + * Part 3.1: + * First page of C is unmapped + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * |||||||||||||||||| |||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | | + * start | + * (1) | + * dummy + * (2) + * + * process_vm_readv will return 2, i.e. pages A and B copied. + * We identify length of iov-C is more than 1 page, that is + * where this case differs from Case 2. + * + * dummy-iov is introduced(2) as: {C+1,3}. dummy-iov can be + * directly placed at next page to failing page. This will copy + * remaining 3 pages from iov-C to userbuf. Finally create + * modified iov entry in aux_iov. Complete aux_iov look like: + * + * aux_iov: {A,1}{B,1}{C+1,3}* + * + * + * Part 3.2: + * In between page of C is unmapped, let's say third + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * |||||||||||||||||||||||||||||||||||| |||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | |-----------------| | + * start partial_read_bytes | + * (1) | + * dummy + * (2) + * + * process_vm_readv will return 4, i.e. pages A and B copied + * completely and first two pages of C are also copied. + * + * Since, iov-C is not processed completely, we need to find + * "partial_read_byte" count to place out dummy-iov for + * remainig processing of iov-C. This function is performed by + * analyze_iov function. + * + * dummy-iov will be(2): {C+3,1}. dummy-iov will be placed + * next to first failing address to process remaining iov-C. + * New entries in aux_iov will look like: + * + * aux_iov: {A,1}{B,1}{C,2}*{C+3,1}* + */ + +unsigned long handle_faulty_iov(int pid, struct iovec* riov, + unsigned long faulty_index, + struct iovec *bufvec, struct iovec* aux_iov, + unsigned long* aux_len, + unsigned long partial_read_bytes) +{ + /* Handling Case 2*/ + if (riov[faulty_index].iov_len == PAGE_SIZE) { + cnt_sub(CNT_PAGES_WRITTEN, 1); + return 0; + } + + struct iovec dummy; + ssize_t bytes_read; + unsigned long offset = 0; + unsigned long final_read_cnt = 0; + + /* Handling Case 3-Part 3.2*/ + offset = (partial_read_bytes)? partial_read_bytes : PAGE_SIZE; + + dummy.iov_base = riov[faulty_index].iov_base + offset; + dummy.iov_len = riov[faulty_index].iov_len - offset; + + if (!partial_read_bytes) + cnt_sub(CNT_PAGES_WRITTEN, 1); + + while (dummy.iov_len) { + + bytes_read = process_vm_readv(pid, bufvec, 1, &dummy, 1, 0); + + if(bytes_read == -1) { + /* Handling faulty page read in faulty iov */ + cnt_sub(CNT_PAGES_WRITTEN, 1); + dummy.iov_base += PAGE_SIZE; + dummy.iov_len -= PAGE_SIZE; + continue; + } + + /* If aux-iov can merge and expand or new entry required */ + if (aux_iov[(*aux_len)-1].iov_base + + aux_iov[(*aux_len)-1].iov_len == dummy.iov_base) + aux_iov[(*aux_len)-1].iov_len += bytes_read; + else { + aux_iov[*aux_len].iov_base = dummy.iov_base; + aux_iov[*aux_len].iov_len = bytes_read; + (*aux_len) += 1; + } + + dummy.iov_base += bytes_read; + dummy.iov_len -= bytes_read; + bufvec->iov_base += bytes_read; + bufvec->iov_len -= bytes_read; + final_read_cnt += bytes_read; + } + + return final_read_cnt; +} + +/* + * This function will position start pointer to the latest + * successfully read iov in iovec. In case of partial read it + * returns partial_read_bytes, otherwise 0. + */ +static unsigned long analyze_iov(ssize_t bytes_read, struct iovec* riov, + unsigned long *index, struct iovec *aux_iov, + unsigned long *aux_len) +{ + ssize_t processed_bytes = 0; + unsigned long partial_read_bytes = 0; + + /* correlating iovs with read bytes */ + while (processed_bytes < bytes_read) { + + processed_bytes += riov[*index].iov_len; + aux_iov[*aux_len].iov_base = riov[*index].iov_base; + aux_iov[*aux_len].iov_len = riov[*index].iov_len; + + (*aux_len) += 1; + (*index) += 1; + } + + /* handling partially processed faulty iov*/ + if (processed_bytes - bytes_read) { + + (*index) -= 1; + + partial_read_bytes = riov[*index].iov_len + - (processed_bytes - bytes_read); + aux_iov[*aux_len-1].iov_len = partial_read_bytes; + } + + return partial_read_bytes; +} + +/* + * This function iterates over complete ppb->iov entries and pass + * them to process_vm_readv syscall. + * + * Since process_vm_readv returns count of successfully read bytes. + * It does not point to iovec entry associated to last successful + * byte read. The correlation between bytes read and corresponding + * iovec is setup through analyze_iov function. + * + * If all iovecs are not processed in one go, it means there exists + * some faulty iov entry(memory mapping modified after it was grabbed) + * in iovec. process_vm_readv syscall stops at such faulty iov and + * skip processing further any entry in iovec. This is handled by + * handle_faulty_iov function. + */ +static long fill_userbuf(int pid, struct page_pipe_buf *ppb, + struct iovec *bufvec, + struct iovec* aux_iov, + unsigned long *aux_len) +{ + struct iovec *riov = ppb->iov; + ssize_t bytes_read; + unsigned long total_read = 0; + unsigned long start = 0; + unsigned long partial_read_bytes = 0; + + while (start < ppb->nr_segs) { + + bytes_read = process_vm_readv(pid, bufvec, 1, &riov[start], + ppb->nr_segs - start, 0); + + if (bytes_read == -1) { + /* Handling Case 1*/ + if (riov[start].iov_len == PAGE_SIZE) { + cnt_sub(CNT_PAGES_WRITTEN, 1); + start += 1; + continue; + } else if (errno == ESRCH) { + pr_debug("Target process PID:%d not found\n", pid); + return ESRCH; + } + } + + partial_read_bytes = 0; + + if (bytes_read > 0) { + partial_read_bytes = analyze_iov(bytes_read, riov, + &start, aux_iov, + aux_len); + bufvec->iov_base += bytes_read; + bufvec->iov_len -= bytes_read; + total_read += bytes_read; + } + + /* + * If all iovs not processed in one go, + * it means some iov in between has failed. + */ + if (start < ppb->nr_segs) + total_read += handle_faulty_iov(pid, riov, start, bufvec, + aux_iov, aux_len, + partial_read_bytes); + + start += 1; + } + + return total_read; +} + +/* + * This function is similar to page_xfer_dump_pages, instead it uses + * auxiliary_iov array for pagemap generation. + * + * The entries of ppb->iov may mismatch with actual process mappings + * present at time of pre-dump. Such entries need to be adjusted as per + * the pages read by process_vm_readv syscall. These adjusted entries + * along with unmodified entries are present in aux_iov array. + */ + +int page_xfer_predump_pages(int pid, struct page_xfer *xfer, + struct page_pipe *pp) +{ + struct page_pipe_buf *ppb; + unsigned int cur_hole = 0, i; + unsigned long ret, bytes_read; + struct iovec bufvec; + + struct iovec aux_iov[PIPE_MAX_SIZE]; + unsigned long aux_len; + + char *userbuf = mmap(NULL, BUFFER_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + if (userbuf == MAP_FAILED) { + pr_perror("Unable to mmap a buffer"); + return -1; + } + + list_for_each_entry(ppb, &pp->bufs, l) { + + aux_len = 0; + bufvec.iov_len = BUFFER_SIZE; + bufvec.iov_base = userbuf; + + bytes_read = fill_userbuf(pid, ppb, &bufvec, aux_iov, &aux_len); + + if (bytes_read == ESRCH) { + munmap(userbuf, BUFFER_SIZE); + return -1; + } + + bufvec.iov_base = userbuf; + bufvec.iov_len = bytes_read; + ret = vmsplice(ppb->p[1], &bufvec, 1, SPLICE_F_NONBLOCK); + + if (ret == -1 || ret != bytes_read) { + pr_err("vmsplice: Failed to splice user buffer to pipe %ld\n", ret); + munmap(userbuf, BUFFER_SIZE); + return -1; + } + + /* generating pagemap */ + for (i = 0; i < aux_len; i++) { + + struct iovec iov = aux_iov[i]; + u32 flags; + + ret = dump_holes(xfer, pp, &cur_hole, iov.iov_base); + if (ret) { + munmap(userbuf, BUFFER_SIZE); + return ret; + } + + BUG_ON(iov.iov_base < (void *)xfer->offset); + iov.iov_base -= xfer->offset; + pr_debug("\t p %p [%u]\n", iov.iov_base, + (unsigned int)(iov.iov_len / PAGE_SIZE)); + + flags = ppb_xfer_flags(xfer, ppb); + + if (xfer->write_pagemap(xfer, &iov, flags)) { + munmap(userbuf, BUFFER_SIZE); + return -1; + } + + if (xfer->write_pages(xfer, ppb->p[0], iov.iov_len)) { + munmap(userbuf, BUFFER_SIZE); + return -1; + } + } + + } + + munmap(userbuf, BUFFER_SIZE); + return dump_holes(xfer, pp, &cur_hole, NULL); +} + int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp) { struct page_pipe_buf *ppb; From 0b56cdc31eb9ae8eb028652c59d1ecfcefd09448 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:23 +0530 Subject: [PATCH 093/277] Refactor time accounting macros refactoring time macros as per read mode pre-dump design. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 7 +++++-- criu/page-xfer.c | 8 ++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 23399a939e..8aabb85b11 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1515,11 +1515,14 @@ static int cr_pre_dump_finish(int status) mem_pp = dmpi(item)->mem_pp; - if (opts.pre_dump_mode == PRE_DUMP_READ) + if (opts.pre_dump_mode == PRE_DUMP_READ) { + timing_stop(TIME_MEMWRITE); ret = page_xfer_predump_pages(item->pid->real, &xfer, mem_pp); - else + } + else { ret = page_xfer_dump_pages(&xfer, mem_pp); + } xfer.close(&xfer); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index ac1cca09e2..ff7c620bc9 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -831,6 +831,8 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, list_for_each_entry(ppb, &pp->bufs, l) { + timing_start(TIME_MEMDUMP); + aux_len = 0; bufvec.iov_len = BUFFER_SIZE; bufvec.iov_base = userbuf; @@ -852,6 +854,9 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, return -1; } + timing_stop(TIME_MEMDUMP); + timing_start(TIME_MEMWRITE); + /* generating pagemap */ for (i = 0; i < aux_len; i++) { @@ -882,9 +887,12 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, } } + timing_stop(TIME_MEMWRITE); } munmap(userbuf, BUFFER_SIZE); + timing_start(TIME_MEMWRITE); + return dump_holes(xfer, pp, &cur_hole, NULL); } From 55bcadb9a6884095d4035def19705a597c322685 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 4 Oct 2019 19:36:37 +0300 Subject: [PATCH 094/277] zdtm: handle --pre-dump-mode in the rpc mode Signed-off-by: Andrei Vagin --- test/zdtm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 7fdb8a3140..c0e3d37e2d 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -912,6 +912,13 @@ def __set_opts(criu, args, ctx): if arg == '--prev-images-dir': criu.opts.parent_img = args.pop(0) continue + if arg == '--pre-dump-mode': + key = args.pop(0) + mode = crpc.rpc.READ + if key == "splice": + mode = crpc.rpc.SPLICE + criu.opts.pre_dump_mode = mode + continue if arg == '--track-mem': criu.opts.track_mem = True continue @@ -929,7 +936,7 @@ def __set_opts(criu, args, ctx): inhfd.key = key continue - raise test_fail_exc('RPC for %s required' % arg) + raise test_fail_exc('RPC for %s(%s) required' % (arg, args.pop(0))) @staticmethod def run(action, From 4b9bcbac9eca2fba41047f9345996f73172d1893 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 4 Oct 2019 20:02:07 +0300 Subject: [PATCH 095/277] lib/c: fix a compile time error lib/c/criu.c:343:30: error: implicit conversion from enumeration type 'enum criu_pre_dump_mode' to different enumeration type 'CriuPreDumpMode' (aka 'enum _CriuPreDumpMode') [-Werror,-Wenum-conversion opts->rpc->pre_dump_mode = mode; ~ ^~~~ Signed-off-by: Andrei Vagin --- lib/c/criu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/c/criu.c b/lib/c/criu.c index fffb9fd9c4..1d0a235f40 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -340,7 +340,7 @@ int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode) { opts->rpc->has_pre_dump_mode = true; if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) { - opts->rpc->pre_dump_mode = mode; + opts->rpc->pre_dump_mode = (CriuPreDumpMode)mode; return 0; } return -1; From 31e47a5deaa8d1ced41e084a9a3a02a7763cae0d Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 5 Oct 2019 22:46:02 +0300 Subject: [PATCH 096/277] criu: use atomic_add instead of atomic_sub atomic_sub isn't defined for all platforms. Reported-by: Mr Jenkins Cc: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/stats.c b/criu/stats.c index cb528011a8..891c378000 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -48,7 +48,7 @@ void cnt_sub(int c, unsigned long val) dstats->counts[c] -= val; } else if (rstats != NULL) { BUG_ON(c >= RESTORE_CNT_NR_STATS); - atomic_sub(val, &rstats->counts[c]); + atomic_add(-val, &rstats->counts[c]); } else BUG(); } From 141e697860e5cd6ef97257883209ab27bfdf795c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 6 Oct 2019 01:01:50 +0300 Subject: [PATCH 097/277] image: avoid name conflicts in image files Conflict register for file "sk-opts.proto": READ is already defined in file "rpc.proto". Please fix the conflict by adding package name on the proto file, or use different name for the duplication. Note: enum values appear as siblings of the enum type instead of children of it. https://github.com/checkpoint-restore/criu/issues/815 Signed-off-by: Andrei Vagin --- criu/cr-service.c | 2 +- images/rpc.proto | 2 +- test/zdtm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 392e9ac50c..a70f99d71c 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -478,7 +478,7 @@ static int setup_opts_from_req(int sk, CriuOpts *req) case CRIU_PRE_DUMP_MODE__SPLICE: opts.pre_dump_mode = PRE_DUMP_SPLICE; break; - case CRIU_PRE_DUMP_MODE__READ: + case CRIU_PRE_DUMP_MODE__VM_READ: opts.pre_dump_mode = PRE_DUMP_READ; break; default: diff --git a/images/rpc.proto b/images/rpc.proto index fc2f1bce28..df1b5aed2a 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -49,7 +49,7 @@ enum criu_cg_mode { enum criu_pre_dump_mode { SPLICE = 1; - READ = 2; + VM_READ = 2; }; message criu_opts { diff --git a/test/zdtm.py b/test/zdtm.py index c0e3d37e2d..25328104ee 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -914,7 +914,7 @@ def __set_opts(criu, args, ctx): continue if arg == '--pre-dump-mode': key = args.pop(0) - mode = crpc.rpc.READ + mode = crpc.rpc.VM_READ if key == "splice": mode = crpc.rpc.SPLICE criu.opts.pre_dump_mode = mode From 919f4e7c0a2bf105cc525c2c5005c4cf4d064ce7 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 1 Oct 2019 20:56:26 +0000 Subject: [PATCH 098/277] Guard against empty file lock status The lock status string may be empty. This can happen when the owner of the lock is invisible from our PID namespace. This unfortunate behavior is fixed in kernels v4.19 and up (see commit 1cf8e5de40) Signed-off-by: Nicolas Viennot Signed-off-by: Andrei Vagin --- criu/proc_parse.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 97f82ee013..d67392a120 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1669,17 +1669,27 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg) if (fdinfo_field(str, "lock")) { struct file_lock *fl; struct fdinfo_common *fdinfo = arg; + char *flock_status = str+sizeof("lock:\t")-1; if (type != FD_TYPES__UND) continue; + /* + * The lock status can be empty when the owner of the + * lock is invisible from our PID namespace. + * This unfortunate behavior is fixed in kernels v4.19 + * and up (see commit 1cf8e5de40). + */ + if (flock_status[0] == '\0') + continue; + fl = alloc_file_lock(); if (!fl) { pr_perror("Alloc file lock failed!"); goto out; } - if (parse_file_lock_buf(str + 6, fl, 0)) { + if (parse_file_lock_buf(flock_status, fl, 0)) { xfree(fl); goto parse_err; } From 40b573166e72f1debed89cb9896c8ec24b084e76 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 28 Sep 2019 06:59:45 +0100 Subject: [PATCH 099/277] page-pipe: Resize up to PIPE_MAX_SIZE When performing pre-dump we continuously increase the page-pipe size to fit the max amount memory pages in the pipe's buffer. However, we never actually set the pipe's buffer size to max. By doing so, we can reduce the number of pipe-s necessary for pre-dump and improve the performance as shown in the example below. For example, let's consider the following process: #include #include #include void main(void) { int i = 0; void *cache = calloc(1, 1024 * 1024 * 1024); while(1) { printf("%d\n", i++); sleep(1); } } stats-dump before this change: frozen_time: 123538 memdump_time: 95344 memwrite_time: 11980078 pages_scanned: 262721 pages_written: 262169 page_pipes: 513 page_pipe_bufs: 519 stats-dump after this change: frozen_time: 83287 memdump_time: 54587 memwrite_time: 12547466 pages_scanned: 262721 pages_written: 262169 page_pipes: 257 page_pipe_bufs: 263 Signed-off-by: Radostin Stoyanov --- criu/page-pipe.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/criu/page-pipe.c b/criu/page-pipe.c index a8216962da..439c180e4f 100644 --- a/criu/page-pipe.c +++ b/criu/page-pipe.c @@ -54,8 +54,12 @@ static inline int ppb_resize_pipe(struct page_pipe_buf *ppb) if (ppb->pages_in + ppb->pipe_off < ppb->pipe_size) return 0; - if (new_size > PIPE_MAX_SIZE) - return 1; + if (new_size > PIPE_MAX_SIZE) { + if (ppb->pipe_size < PIPE_MAX_SIZE) + ppb->pipe_size = PIPE_MAX_SIZE; + else + return 1; + } ret = __ppb_resize_pipe(ppb, new_size); if (ret < 0) From 48ae837908283f444f0ab29ae8800f03eba6223f Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 30 Apr 2019 11:35:26 +0300 Subject: [PATCH 100/277] cgroup: fix cg_yard leak on error path in prepare_cgroup_sfd Signed-off-by: Pavel Tikhomirov --- criu/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index 1be8be234a..a66fc960e6 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -1730,11 +1730,11 @@ static int prepare_cgroup_sfd(CgroupEntry *ce) pr_debug("\tMaking controller dir %s (%s)\n", paux, opt); if (mkdir(paux, 0700)) { pr_perror("\tCan't make controller dir %s", paux); - return -1; + goto err; } if (mount("none", paux, "cgroup", 0, opt) < 0) { pr_perror("\tCan't mount controller dir %s", paux); - return -1; + goto err; } } From 196551bffdb7ef64e28e876dccdf02aa4113acc0 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 14 Sep 2019 12:47:14 +0100 Subject: [PATCH 101/277] image-desc: Remove CR_FD_FILE_LOCKS_PID The support for per-pid images with locks has been dropped with commit d040219 ("locks: Drop support for per-pid images with locks") and CR_FD_FILE_LOCKS_PID is not used. Signed-off-by: Radostin Stoyanov --- criu/image-desc.c | 5 ----- criu/include/image-desc.h | 1 - 2 files changed, 6 deletions(-) diff --git a/criu/image-desc.c b/criu/image-desc.c index 053e7af219..81cd074840 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -112,9 +112,4 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { .magic = IRMAP_CACHE_MAGIC, .oflags = O_SERVICE | O_FORCE_LOCAL, }, - - [CR_FD_FILE_LOCKS_PID] = { - .fmt = "filelocks-%u.img", - .magic = FILE_LOCKS_MAGIC, - }, }; diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 3135f56b4d..fea80a719b 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -79,7 +79,6 @@ enum { CR_FD_RLIMIT, CR_FD_ITIMERS, CR_FD_POSIX_TIMERS, - CR_FD_FILE_LOCKS_PID, CR_FD_IRMAP_CACHE, CR_FD_CPUINFO, From 91a5cad183b94031a9d996d7b2b595c78a1b9292 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 8 Oct 2019 21:37:22 +0100 Subject: [PATCH 102/277] service: Use space on stack for msg buffer RPC messages are have fairly small size and using space on the stack might be a better option. This change follows the pattern used with do_pb_read_one() and pb_write_one(). Signed-off-by: Radostin Stoyanov --- criu/cr-service.c | 56 +++++++++++++++++++++++------------------ criu/include/protobuf.h | 7 ++++++ criu/protobuf.c | 7 ------ 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index a70f99d71c..549b3368b3 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -27,6 +27,7 @@ #include "cr-service.h" #include "cr-service-const.h" #include "page-xfer.h" +#include "protobuf.h" #include "net.h" #include "mount.h" #include "filesystems.h" @@ -49,18 +50,21 @@ unsigned int service_sk_ino = -1; static int recv_criu_msg(int socket_fd, CriuReq **req) { - unsigned char *buf; - int len; + u8 local[PB_PKOBJ_LOCAL_SIZE]; + void *buf = (void *)&local; + int len, exit_code = -1; len = recv(socket_fd, NULL, 0, MSG_TRUNC | MSG_PEEK); if (len == -1) { pr_perror("Can't read request"); - return -1; + goto err; } - buf = xmalloc(len); - if (!buf) - return -ENOMEM; + if (len > sizeof(local)) { + buf = xmalloc(len); + if (!buf) + return -ENOMEM; + } len = recv(socket_fd, buf, len, MSG_TRUNC); if (len == -1) { @@ -80,43 +84,47 @@ static int recv_criu_msg(int socket_fd, CriuReq **req) goto err; } - xfree(buf); - return 0; + exit_code = 0; err: - xfree(buf); - return -1; + if (buf != (void *)&local) + xfree(buf); + return exit_code; } static int send_criu_msg_with_fd(int socket_fd, CriuResp *msg, int fd) { - unsigned char *buf; - int len, ret; + u8 local[PB_PKOBJ_LOCAL_SIZE]; + void *buf = (void *)&local; + int len, exit_code = -1; len = criu_resp__get_packed_size(msg); - buf = xmalloc(len); - if (!buf) - return -ENOMEM; + if (len > sizeof(local)) { + buf = xmalloc(len); + if (!buf) + return -ENOMEM; + } if (criu_resp__pack(msg, buf) != len) { pr_perror("Failed packing response"); goto err; } - if (fd >= 0) { - ret = send_fds(socket_fd, NULL, 0, &fd, 1, buf, len); - } else - ret = write(socket_fd, buf, len); - if (ret < 0) { + if (fd >= 0) + exit_code = send_fds(socket_fd, NULL, 0, &fd, 1, buf, len); + else + exit_code = write(socket_fd, buf, len); + + if (exit_code < 0) { pr_perror("Can't send response"); goto err; } - xfree(buf); - return 0; + exit_code = 0; err: - xfree(buf); - return -1; + if (buf != (void *)&local) + xfree(buf); + return exit_code; } static int send_criu_msg(int socket_fd, CriuResp *msg) diff --git a/criu/include/protobuf.h b/criu/include/protobuf.h index fb7489e9d4..0b6d8c1505 100644 --- a/criu/include/protobuf.h +++ b/criu/include/protobuf.h @@ -52,4 +52,11 @@ static inline int collect_images(struct collect_image_info **array, unsigned siz return 0; } +/* + * To speed up reading of packed objects + * by providing space on stack, this should + * be more than enough for most objects. + */ +#define PB_PKOBJ_LOCAL_SIZE 1024 + #endif /* __CR_PROTOBUF_H__ */ diff --git a/criu/protobuf.c b/criu/protobuf.c index 8eb73e0198..e68d42b5ca 100644 --- a/criu/protobuf.c +++ b/criu/protobuf.c @@ -20,13 +20,6 @@ #include "protobuf.h" #include "util.h" -/* - * To speed up reading of packed objects - * by providing space on stack, this should - * be more than enough for most objects. - */ -#define PB_PKOBJ_LOCAL_SIZE 1024 - static char *image_name(struct cr_img *img) { int fd = img->_x.fd; From ee963480601a30d5ec2afc1437dbee4eb8a4c77c Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Fri, 13 Sep 2019 18:47:33 +0000 Subject: [PATCH 103/277] Couple of fixes to build and run libcriu tests libcriu tests are currently broken. This patch fixes couple of issues to allow the building and running libcriu tests. 1. lib/c/criu.h got updated to include version.h which is present at "criu/include", but the command to compile libcriu tests is not specifying "criu/include" in the path to be searched for header files. This resulted in compilation error. This can be fixed by adding "-I ../../../../../criu/criu/include" however it causes more problems as "criu/include/fcntl.h" would now hide system defined fcntl.h Solution is to use "-iquote ../../../../../criu/criu/include" which applies only to the quote form of include directive. 2. Secondly, libcriu.so major version got updated to 2 but libcriu/run.sh still assumes verion 1. Instead of just updating the version in libcriu/run.sh to 2, this patch updates the libcriu/Makefile to use "CRIU_SO_VERSION_MAJOR" so that future changes to major version of libcriu won't cause same problem again. Signed-off-by: Ashutosh Mehra --- test/others/libcriu/Makefile | 14 ++++++++++++-- test/others/libcriu/run.sh | 5 ++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/test/others/libcriu/Makefile b/test/others/libcriu/Makefile index 5289ed15aa..226396e6a0 100644 --- a/test/others/libcriu/Makefile +++ b/test/others/libcriu/Makefile @@ -1,3 +1,5 @@ +include ../../../../criu/Makefile.versions + TESTS += test_sub TESTS += test_self TESTS += test_notify @@ -19,8 +21,16 @@ endef $(foreach t, $(TESTS), $(eval $(call genb, $(t)))) %.o: %.c - gcc -c $^ -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror + gcc -c $^ -iquote ../../../../criu/criu/include -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror -clean: +clean: libcriu_clean rm -rf $(TESTS) $(TESTS:%=%.o) lib.o .PHONY: clean + +libcriu_clean: + rm -f libcriu.so.${CRIU_SO_VERSION_MAJOR} +.PHONY: libcriu_clean + +libcriu: + ln -s ../../../../criu/lib/c/libcriu.so libcriu.so.${CRIU_SO_VERSION_MAJOR} +.PHONY: libcriu diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh index a99b91e52a..5f692db31b 100755 --- a/test/others/libcriu/run.sh +++ b/test/others/libcriu/run.sh @@ -5,14 +5,13 @@ source ../env.sh || exit 1 echo "== Clean" make clean +make libcriu rm -rf wdir -rm -f ./libcriu.so.1 echo "== Prepare" mkdir -p wdir/i/ echo "== Run tests" -ln -s ../../../../criu/lib/c/libcriu.so libcriu.so.1 export LD_LIBRARY_PATH=. export PATH="`dirname ${BASH_SOURCE[0]}`/../../:$PATH" @@ -40,6 +39,6 @@ run_test test_iters run_test test_errno echo "== Tests done" -unlink libcriu.so.1 +make libcriu_clean [ $RESULT -eq 0 ] && echo "Success" || echo "FAIL" exit $RESULT From 4c1350d692bd695c15535d3e66d3b037c98c6648 Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Mon, 16 Sep 2019 06:49:07 +0000 Subject: [PATCH 104/277] Enable libcriu testing in travis jobs Updated scripts/travis/travis-tests to run libcriu test. Signed-off-by: Ashutosh Mehra --- scripts/travis/travis-tests | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 980d747348..b2ebe969b5 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -161,6 +161,9 @@ ip net add test ./test/zdtm.py run -t zdtm/static/env00 -k always ./test/crit-recode.py +# libcriu testing +make -C test/others/libcriu run + make -C test/others/shell-job if ! [ -x "$(command -v flake8)" ]; then From 870eaa5710f7b32df07c7b48a75257b7dee884f7 Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Mon, 23 Sep 2019 08:36:12 +0000 Subject: [PATCH 105/277] Add `criu` to PATH env variable in libcriu tests PATH is pointing to incorrect location for `criu` executable causing libcriu tests to fail when running in travis. Also added statements to display log file contents on failure to help in debugging. Signed-off-by: Ashutosh Mehra --- test/others/libcriu/run.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh index 5f692db31b..bd92f8544b 100755 --- a/test/others/libcriu/run.sh +++ b/test/others/libcriu/run.sh @@ -13,7 +13,7 @@ mkdir -p wdir/i/ echo "== Run tests" export LD_LIBRARY_PATH=. -export PATH="`dirname ${BASH_SOURCE[0]}`/../../:$PATH" +export PATH="`dirname ${BASH_SOURCE[0]}`/../../../criu:$PATH" RESULT=0 @@ -21,6 +21,19 @@ function run_test { echo "== Build $1" if ! make $1; then echo "FAIL build $1" + echo "** Output of $1/test.log" + cat wdir/i/$1/test.log + echo "---------------" + if [ -f wdir/i/$1/dump.log ]; then + echo "** Contents of dump.log" + cat wdir/i/$1/dump.log + echo "---------------" + fi + if [ -f wdir/i/$1/restore.log ]; then + echo "** Contents of restore.log" + cat wdir/i/$1/restore.log + echo "---------------" + fi RESULT=1; else echo "== Test $1" From 99049e219d834bc913273ee3a415e9499dc2dfbd Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 1 Oct 2019 00:29:14 +0100 Subject: [PATCH 106/277] early-log: Print warnings only if the buffer is full I don't see many issues with early-log, so we probably don't need the warning when it was used. Note that after commit 74731d9 ("zdtm: make grep_errors also grep warnings") also warnings are grepped by zdtm.py (and I believe that was an improvement) which prints some bothering lines: > =[log]=> dump/zdtm/static/inotify00/38/1/dump.log > ------------------------ grep Error ------------------------ > (00.000000) Will allow link remaps on FS > (00.000034) Warn (criu/log.c:203): The early log isn't empty > ------------------------ ERROR OVER ------------------------ Instead of decreasing loglevel of the message, improve it by reporting a real issue. Cc: Adrian Reber Cc: Pavel Tikhomirov Cc: Radostin Stoyanov Signed-off-by: Dmitry Safonov --- criu/log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/log.c b/criu/log.c index 8bdf835341..0ee113b91a 100644 --- a/criu/log.c +++ b/criu/log.c @@ -199,8 +199,8 @@ void flush_early_log_buffer(int fd) } pos += hdr->len; } - if (early_log_buf_off) - pr_warn("The early log isn't empty\n"); + if (early_log_buf_off == EARLY_LOG_BUF_LEN) + pr_warn("The early log buffer is full, some messages may have been lost\n"); early_log_buf_off = 0; } From 48c967240bce335897e99513419f2fceaf9bb3ac Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 24 Oct 2019 19:39:39 +0000 Subject: [PATCH 107/277] Action scripts should be invoked with normal signal behavior Signal masks propagate through execve, so we need to clear them before invoking the action scripts as it may want to handle SIGCHLD, or SIGSEGV. Signed-off-by: Nicolas Viennot --- criu/util.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/criu/util.c b/criu/util.c index f02486a15f..2a3d7abcab 100644 --- a/criu/util.c +++ b/criu/util.c @@ -558,7 +558,7 @@ int cr_system_userns(int in, int out, int err, char *cmd, sigemptyset(&blockmask); sigaddset(&blockmask, SIGCHLD); if (sigprocmask(SIG_BLOCK, &blockmask, &oldmask) == -1) { - pr_perror("Can not set mask of blocked signals"); + pr_perror("Cannot set mask of blocked signals"); return -1; } @@ -567,6 +567,12 @@ int cr_system_userns(int in, int out, int err, char *cmd, pr_perror("fork() failed"); goto out; } else if (pid == 0) { + sigemptyset(&blockmask); + if (sigprocmask(SIG_SETMASK, &blockmask, NULL) == -1) { + pr_perror("Cannot clear blocked signals"); + goto out_chld; + } + if (userns_pid > 0) { if (switch_ns(userns_pid, &user_ns_desc, NULL)) goto out_chld; From 0d6e92ac8c3478aa640e9429fab1cb39d656941f Mon Sep 17 00:00:00 2001 From: Sergey Bronnikov Date: Sun, 3 Nov 2019 13:08:09 +0300 Subject: [PATCH 108/277] Fix broken web-links --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 16e8452b55..558e871601 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,8 @@ Linux kernel supporting checkpoint and restore for all the features it provides. looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. Here are some useful hints to get involved. -* We have both -- [very simple](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; -* CRIU does need [extensive testing](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); +* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; +* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; * Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); * For historical reasons we do not accept PRs, instead [patches are welcome](http://criu.org/How_to_submit_patches); From c28e3b8787c8a2e135e443be0433adadf196daab Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 29 Oct 2019 15:17:40 +0100 Subject: [PATCH 109/277] travis: start to use aarch64 hardware With the newly introduced aarch64 at Travis it is possible for the CRIU test-cases to switch to aarch64. Travis uses unprivileged LXD containers on aarch64 which blocks many of the kernel interfaces CRIU needs. So for now this only tests building CRIU natively on aarch64 instead of using the Docker+QEMU combination. All tests based on Docker are not working on aarch64 is there currently seems to be a problem with Docker on aarch64. Maybe because of the nesting of Docker in LXD. Signed-off-by: Adrian Reber --- .travis.yml | 11 ++++-- scripts/build/Dockerfile.alpine | 2 +- scripts/build/Dockerfile.centos | 1 + scripts/build/Makefile | 4 +-- scripts/travis/travis-tests | 63 +++++++++++++++++++++------------ 5 files changed, 54 insertions(+), 27 deletions(-) diff --git a/.travis.yml b/.travis.yml index 82ba9fbc8f..4cde9c4fba 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,11 +14,9 @@ env: - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - TR_ARCH=armv7hf - - TR_ARCH=aarch64 - TR_ARCH=ppc64le - TR_ARCH=s390x - TR_ARCH=armv7hf CLANG=1 - - TR_ARCH=aarch64 CLANG=1 - TR_ARCH=ppc64le CLANG=1 - TR_ARCH=alpine CLANG=1 - TR_ARCH=docker-test @@ -27,6 +25,15 @@ env: - TR_ARCH=centos - TR_ARCH=podman-test matrix: + include: + - os: linux + arch: arm64 + env: TR_ARCH=local + dist: bionic + - os: linux + arch: arm64 + env: TR_ARCH=local CLANG=1 + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index c71a3901f4..70fdf480aa 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -42,5 +42,5 @@ RUN apk add \ # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip install protobuf ipaddress junit_xml +RUN pip install protobuf ipaddress junit_xml flake8 RUN make -C test/zdtm diff --git a/scripts/build/Dockerfile.centos b/scripts/build/Dockerfile.centos index 2ce40b179b..213be694fb 100644 --- a/scripts/build/Dockerfile.centos +++ b/scripts/build/Dockerfile.centos @@ -23,6 +23,7 @@ RUN yum install -y \ protobuf-devel \ protobuf-python \ python \ + python-flake8 \ python-ipaddress \ python2-future \ python2-junit_xml \ diff --git a/scripts/build/Makefile b/scripts/build/Makefile index bb2e9ca9d2..3d4d91cd57 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,5 @@ -QEMU_ARCHES := armv7hf aarch64 ppc64le s390x fedora-rawhide-aarch64 # require qemu -ARCHES := $(QEMU_ARCHES) x86_64 fedora-asan fedora-rawhide centos +QEMU_ARCHES := armv7hf ppc64le s390x fedora-rawhide-aarch64 # require qemu +ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index b2ebe969b5..1f6b19130b 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -1,17 +1,31 @@ #!/bin/sh set -x -e -TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c-dev libaio-dev +TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c-dev libaio-dev python-future libgnutls28-dev libgnutls30 libprotobuf-dev protobuf-compiler - libcap-dev libnl-3-dev gcc-multilib gdb bash python-protobuf - libnet-dev util-linux asciidoctor libnl-route-3-dev" + libcap-dev libnl-3-dev gdb bash python-protobuf python-yaml + libnet-dev util-linux asciidoctor libnl-route-3-dev + python-junit.xml python-ipaddress time ccache flake8 + libbsd-dev" + +X86_64_PKGS="gcc-multilib" + +UNAME_M=`uname -m` + +if [ "$UNAME_M" != "x86_64" ]; then + # For Travis only x86_64 seems to be baremetal. Other + # architectures are running in unprivileged LXD containers. + # That seems to block most of CRIU's interfaces. + SKIP_TRAVIS_TEST=1 +fi travis_prep () { [ -n "$SKIP_TRAVIS_PREP" ] && return cd ../../ - service apport stop + # This can fail on aarch64 travis + service apport stop || : CC=gcc # clang support @@ -43,24 +57,41 @@ travis_prep () { sed -i '/security/ d' /etc/apt/sources.list fi + + # Do not install x86_64 specific packages on other architectures + if [ "$UNAME_M" = "x86_64" ]; then + TRAVIS_PKGS="$TRAVIS_PKGS $X86_64_PKGS" + fi + apt-get update -qq apt-get install -qq --no-install-recommends $TRAVIS_PKGS - # travis is based on 14.04 and that does not have python - # packages for future and ipaddress (16.04 has those packages) - pip install junit-xml future ipaddress chmod a+x $HOME } travis_prep -ulimit -c unlimited -echo "|`pwd`/test/abrt.sh %P %p %s %e" > /proc/sys/kernel/core_pattern - export GCOV +$CC --version time make CC="$CC" -j4 +./criu/criu -v4 cpuinfo dump || : +./criu/criu -v4 cpuinfo check || : + +make lint + +# Check that help output fits into 80 columns +WIDTH=$(./criu/criu --help | wc --max-line-length) +if [ "$WIDTH" -gt 80 ]; then + echo "criu --help output does not obey 80 characters line width!" + exit 1 +fi + [ -n "$SKIP_TRAVIS_TEST" ] && return +ulimit -c unlimited + +echo "|`pwd`/test/abrt.sh %P %p %s %e" > /proc/sys/kernel/core_pattern + if [ "${COMPAT_TEST}x" = "yx" ] ; then # Dirty hack to keep both ia32 & x86_64 shared libs on a machine: # headers are probably not compatible, so apt-get doesn't allow @@ -165,15 +196,3 @@ ip net add test make -C test/others/libcriu run make -C test/others/shell-job - -if ! [ -x "$(command -v flake8)" ]; then - pip install flake8 -fi -make lint - -# Check that help output fits into 80 columns -WIDTH=$(./criu/criu --help | wc --max-line-length) -if [ "$WIDTH" -gt 80 ]; then - echo "criu --help output does not obey 80 characters line width!" - exit 1 -fi From 2bb53e794d2e9eef17e5e703d6faa60bc3f8104a Mon Sep 17 00:00:00 2001 From: Vitaly Ostrosablin Date: Fri, 1 Nov 2019 09:00:23 +0000 Subject: [PATCH 110/277] test/static:conntracks: Support nftables Update test to support both iptables and nft to create conntrack rules. Signed-off-by: Vitaly Ostrosablin Signed-off-by: Andrei Vagin --- test/zdtm/static/conntracks | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/conntracks b/test/zdtm/static/conntracks index a30e0e2685..26220f97c0 100755 --- a/test/zdtm/static/conntracks +++ b/test/zdtm/static/conntracks @@ -23,7 +23,7 @@ do_or_fail() fail "$failmsg: $output" } -do_start() +do_start_ipt() { [ -f "$statefile" ] && die "state file $statefile aleady exists" @@ -35,7 +35,7 @@ do_start() iptables -L \> "$statefile" } -do_stop() +do_stop_ipt() { do_or_fail "can't compare the iptables" \ iptables -L \| diff -u "$statefile" - @@ -45,6 +45,38 @@ do_stop() echo "PASS" > $outfile } +do_start_nft() +{ + [ -f "$statefile" ] && die "state file $statefile aleady exists" + + do_or_fail "can't install a state match" \ + nft add rule filter INPUT \ + ct state related,established accept + + do_or_fail "can't list the loaded nftables" \ + nft list ruleset \> "$statefile" +} + +do_stop_nft() +{ + do_or_fail "can't compare the nftables" \ + nft list ruleset \| diff -u "$statefile" - + + rm -f "$statefile" + + echo "PASS" > $outfile +} + +do_start() +{ + [ -x "$(command -v nft)" ] && do_start_nft || do_start_ipt +} + +do_stop() +{ + [ -x "$(command -v nft)" ] && do_stop_nft || do_stop_ipt +} + tmpargs="$(../lib/parseargs.sh --name=$0 \ --flags-req=statefile,outfile \ --flags-opt="start,stop" -- "$@")" || From a43ea014260306e35298306da65a3c824c89e4ae Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Fri, 18 Oct 2019 20:09:15 +0530 Subject: [PATCH 111/277] test/java: Add FileRead Tests Signed-off-by: Nidhi Gupta --- test/javaTests/README.md | 33 ++ test/javaTests/pom.xml | 47 ++ .../criu/java/tests/CheckpointRestore.java | 450 ++++++++++++++++++ .../src/org/criu/java/tests/FileRead.java | 175 +++++++ .../src/org/criu/java/tests/Helper.java | 99 ++++ .../src/org/criu/java/tests/ImgFilter.java | 11 + test/javaTests/test.xml | 13 + 7 files changed, 828 insertions(+) create mode 100644 test/javaTests/README.md create mode 100644 test/javaTests/pom.xml create mode 100644 test/javaTests/src/org/criu/java/tests/CheckpointRestore.java create mode 100644 test/javaTests/src/org/criu/java/tests/FileRead.java create mode 100644 test/javaTests/src/org/criu/java/tests/Helper.java create mode 100644 test/javaTests/src/org/criu/java/tests/ImgFilter.java create mode 100644 test/javaTests/test.xml diff --git a/test/javaTests/README.md b/test/javaTests/README.md new file mode 100644 index 0000000000..cb779285ed --- /dev/null +++ b/test/javaTests/README.md @@ -0,0 +1,33 @@ +# JavaTests + +Java Functional tests checks the Java File based APIs and Memory mapping APIs by placing the process in various states before checkpointing and validates if these resources are still accessible after restore. It also validates if the file contents are in expected states. + +Tests are to be run by a user having following capabilities: +CAP_DAC_OVERRIDE +CAP_CHOWN +CAP_SETPCAP +CAP_SETGID +CAP_AUDIT_CONTROL +CAP_DAC_READ_SEARCH +CAP_NET_ADMIN +CAP_SYS_ADMIN +CAP_SYS_CHROOT +CAP_SYS_PTRACE +CAP_FOWNER +CAP_KILL +CAP_FSETID +CAP_SYS_RESOURCE +CAP_SETUID + +## File-based Java APIs + +Here we test the File-Based Java APIs by checkpointing the application in the following scenarios and verifying the contents of the file after restore: +- Reading and writing in the same file. (FileRead.java) + +### Prerequisites for running the tests: +- Maven + +### To run the tests: +- In the javaTests folder run the command ```sudo mvn test``` +- To keep the img files and logs from previous failures, between different runs of the test, use the ```-DneverCleanFailures=true ``` option in the maven command +as ```sudo mvn -DneverCleanFailures=true test``` diff --git a/test/javaTests/pom.xml b/test/javaTests/pom.xml new file mode 100644 index 0000000000..faae44d1bf --- /dev/null +++ b/test/javaTests/pom.xml @@ -0,0 +1,47 @@ + + 4.0.0 + criu + criu-javaTests + 1 + criu-javaTests + + + src + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.14.1 + + + + test.xml + + + + + + maven-compiler-plugin + 3.1 + + 1.7 + 1.7 + + + + + + + + org.testng + testng + 6.3.1 + + + + UTF-8 + + diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java new file mode 100644 index 0000000000..968488191d --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -0,0 +1,450 @@ +package org.criu.java.tests; + +import org.testng.Assert; +import org.testng.annotations.AfterTest; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.Parameters; +import org.testng.annotations.Test; + +import java.io.*; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.text.SimpleDateFormat; +import java.util.Date; + +public class CheckpointRestore { + private MappedByteBuffer mappedByteBuffer = null; + private String testName = ""; + private String logFolder = Helper.LOG_FOLDER + "/"; + private String outputFolder = Helper.OUTPUT_FOLDER_NAME + "/"; + + /** + * Create CRlog and output directory if they don't exist. + * Delete directories containing .img files from failed Checkpoint-Restore if 'neverCleanFailures' property is not set to true. + * + * @throws IOException + */ + @BeforeSuite + void suiteSetup() throws IOException { + System.out.println("Tests are to be run as a privileged user having capabilities mentioned in ReadMe"); + boolean neverCleanFailures = Boolean.getBoolean("neverCleanFailures"); + Path logDir = Paths.get(logFolder); + Path outputDir = Paths.get(outputFolder); + if (!Files.exists(logDir)) { + System.out.println("Logs directory does not exist, creating it"); + Files.createDirectory(logDir); + } + if (!Files.exists(outputDir)) { + System.out.println("Output directory does not exist, creating it"); + Files.createDirectory(outputDir); + } + /* + * Delete the directories containing the img files from failed Checkpoint-Restore. + */ + if (!neverCleanFailures) { + File output = new File(outputFolder); + String[] name = output.list(); + for (int i = 0; null != name && i < name.length; i++) { + File testFolder = new File(outputFolder + name[i]); + if (testFolder.isDirectory()) { + String[] list = testFolder.list(); + File file; + if (null != list) { + for (int j = 0; j < list.length; j++) { + file = new File(outputFolder + name[i] + "/" + list[j]); + if (!file.isDirectory()) { + Files.delete(file.toPath()); + } + } + } + } + Files.delete(testFolder.toPath()); + } + } + } + + /** + * Create the output folder for the test in case it does not exist + * + * @param testName Name of the java test + * @throws IOException + */ + private void testSetup(String testName) throws IOException { + Path testFolderPath = Paths.get(outputFolder + testName + "/"); + if (!Files.exists(testFolderPath)) { + System.out.println("Test Folder does not exist creating it"); + Files.createDirectory(testFolderPath); + } + } + + /** + * Read the pid of process from the pid file of test + * + * @param name Name of the java test + * @return pid Process id of the java test process + * @throws IOException + */ + private String getPid(String name) throws IOException { + name = outputFolder + testName + "/" + name + Helper.PID_APPEND; + File pidfile = new File(name); + BufferedReader pidReader = new BufferedReader(new FileReader(pidfile)); + String pid = pidReader.readLine(); + pidReader.close(); + return pid; + } + + /** + * @param testName Name of the java test + * @param checkpointOpt Additional options for checkpoint + * @param restoreOpt Additional options for restore + * @throws Exception + */ + @Test + @Parameters({"testname", "checkpointOpt", "restoreOpt"}) + public void runtest(String testName, String checkpointOpt, String restoreOpt) throws Exception { + this.testName = testName; + String name = Helper.PACKAGE_NAME + "." + testName; + String pid; + int exitCode; + + System.out.println("======= Testing " + testName + " ========"); + + testSetup(testName); + + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + if (f.exists()) { + f.delete(); + } + + /* + * Create a new file that will be mapped to memory and used to communicate between + * this process and the java test process. + */ + boolean newFile = f.createNewFile(); + Assert.assertTrue(newFile, "Unable to create a new file to be mapped"); + + /* + * MappedByteBuffer communicates between this process and java process called. + */ + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + mappedByteBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + mappedByteBuffer.clear(); + channel.close(); + + /* + * Put MappedByteBuffer in Init state + */ + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + /* + * Run the test as a separate process + */ + System.out.println("Starting the java Test"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", name); + Process process = builder.start(); + + char currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + /* + * Loop until the test process changes the state of MappedByteBuffer from init state + */ + while (Helper.STATE_INIT == currentState) { + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + } + + /* + * If Mapped Buffer is in Helper.STATE_FAIL state before checkpointing then an exception must + * have occurred in the test. + */ + while (Helper.STATE_FAIL == currentState) { + try { + /* + * We exit the test process with exit code 5 in case of an exception + */ + exitCode = process.exitValue(); + /* + * Reaching here implies that .exitValue() has not thrown an exception, so the process has + * exited, We now check the exitCode. + */ + if (5 == exitCode) { + Assert.fail(testName + ": Exception occurred while running the test: check the log file for details."); + } else { + Assert.fail(testName + ": ERROR: Unexpected value of exit code: " + exitCode + ", expected: 5"); + } + } catch (IllegalThreadStateException e) { + /* + * Do nothing, as an Exception is expected if the process has not exited + * and we try to get its exitValue. + */ + } + + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + } + + /* + * Mapped Buffer state should be Helper.STATE_CHECKPOINT for checkpointing or Helper.STATE_END if some error occurs in test + */ + if (Helper.STATE_END != currentState) { + Assert.assertEquals(currentState, Helper.STATE_CHECKPOINT, testName + ": ERROR: Error occurred while running the test: test is not in the excepted 'waiting to be checkpointed state': " + currentState); + } else { + Assert.fail(testName + ": ERROR: Error took place in the test check the log file for more details"); + } + /* + * Reaching here implies that MappedByteBuffer is in To Be Checkpointed state. + * Get the pid of the test process + */ + + pid = getPid(testName); + try { + /* + * Checkpoint the process + */ + checkpoint(pid, checkpointOpt); + + } catch (Exception e) { + /* + * If exception occurs put the MappedByteBuffer to Helper.STATE_TERMINATE-Terminate state. + * On reading the terminate state, the test process terminates, else it + * may go on looping. + */ + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + Assert.fail(testName + ": Exception occurred while during checkpointing" + e, e); + } + + /* + * The process has been checkpointed successfully, now restoring the process. + */ + try { + /* + * Restore the process + */ + restore(restoreOpt); + } catch (Exception e) { + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + Assert.fail(testName + ": Exception occurred while restoring the test" + e, e); + } + + /* + * Wait for test process to finish + */ + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + while (Helper.STATE_RESTORE == currentState) { + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + } + + /* + * If a test passes it puts the MappedByteBuffer to Helper.STATE_PASS-Pass state, + * On failing to Helper.STATE_FAIL-Fail state, and if our Buffer is in Helper.STATE_TERMINATE state + * its because the checkpoint-restore of test process failed. + */ + + Assert.assertNotEquals(currentState, Helper.STATE_TERMINATE, testName + ": ERROR: Checkpoint-Restore failed"); + Assert.assertNotEquals(currentState, Helper.STATE_FAIL, testName + ": ERROR: Test Failed, Check Log for details"); + Assert.assertEquals(currentState, Helper.STATE_PASS, testName + " ERROR: Unexpected State of Mapped Buffer"); + System.out.println("-----" + "PASS" + "-----"); + + } + + /** + * Remove .img files, dump.log, restore.log, stats-dump and stats-restore files from Log Directory + * + * @throws IOException + */ + @AfterTest + void cleanup() throws IOException { + int i; + String currentPath = System.getProperty("user.dir"); + currentPath = currentPath + "/" + logFolder; + File deleteFile; + File dir = new File(currentPath); + String[] imgFiles = dir.list(new ImgFilter()); + if (null != imgFiles) { + for (i = 0; i < imgFiles.length; i++) { + deleteFile = new File(currentPath + imgFiles[i]); + Files.delete(deleteFile.toPath()); + } + } + + boolean exists = Files.exists(Paths.get(currentPath + "dump.log")); + if (exists) { + Files.delete(Paths.get(currentPath + "dump.log")); + } + + exists = Files.exists(Paths.get(currentPath + "restore.log")); + if (exists) { + Files.delete(Paths.get(currentPath + "restore.log")); + } + + exists = Files.exists(Paths.get(currentPath + "stats-dump")); + if (exists) { + Files.delete(Paths.get(currentPath + "stats-dump")); + } + + exists = Files.exists(Paths.get(currentPath + "stats-restore")); + if (exists) { + Files.delete(Paths.get(currentPath + "stats-restore")); + } + } + + /** + * Copy .img files, dump.log, restore.log, stats-dump and stats-restore files from Log Directory if they exist + * to another folder. + * + * @throws IOException + */ + String copyFiles() throws IOException { + String currentPath = System.getProperty("user.dir"); + String folderSuffix = new SimpleDateFormat("yyMMddHHmmss").format(new Date()); + String fromPath = currentPath + "/" + logFolder; + File fromDir = new File(fromPath); + Path fromFile, toFile; + boolean exists; + String toPath = currentPath + "/" + outputFolder + testName + folderSuffix + "/"; + Path dirPath = Paths.get(toPath); + Files.createDirectory(dirPath); + + String[] imgFiles = fromDir.list(new ImgFilter()); + if (null != imgFiles) { + for (int i = 0; i < imgFiles.length; i++) { + fromFile = Paths.get(fromPath + imgFiles[i]); + toFile = Paths.get(toPath + imgFiles[i]); + Files.copy(fromFile, toFile); + } + } + + fromFile = Paths.get(fromPath + "dump.log"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "dump.log"); + Files.copy(fromFile, toFile); + } + + fromFile = Paths.get(fromPath + "restore.log"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "restore.log"); + Files.copy(fromFile, toFile); + } + + fromFile = Paths.get(fromPath + "stats-dump"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "stats-dump"); + Files.copy(fromFile, toFile); + } + + fromFile = Paths.get(fromPath + "stats-restore"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "stats-restore"); + Files.copy(fromFile, toFile); + } + + return folderSuffix; + } + + /** + * Checkpoint the process, if process has not been checkpointed correctly + * copy the .img, log and stats files, puts MappedBuffer to 'terminate' state and mark + * test as failed + * + * @param pid Pid of process to be checkpointed + * @param checkpointOpt Additional options for checkpoint + * @throws IOException + * @throws InterruptedException + */ + private void checkpoint(String pid, String checkpointOpt) throws IOException, InterruptedException { + ProcessBuilder builder; + System.out.println("Checkpointing process " + pid); + String command = "../../criu/criu dump --shell-job -t " + pid + " -vvv -D " + logFolder + " -o dump.log"; + if (0 == checkpointOpt.length()) { + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } else { + command = command + " " + checkpointOpt; + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } + Process process = builder.start(); + BufferedReader stdError = new BufferedReader(new InputStreamReader(process.getErrorStream())); + int exitCode = process.waitFor(); + + if (0 != exitCode) { + /* + * Print the error stream + */ + String line = stdError.readLine(); + while (null != line) { + System.out.println(line); + line = stdError.readLine(); + } + + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + /* + * If checkpoint fails copy the img files, dump.log, stats-dump, stats-restore + */ + String folderSuffix = copyFiles(); + + Assert.fail(testName + ": ERROR: Error during checkpoint: exitCode of checkpoint process was not zero.\nFor more details check dump.log in " + outputFolder + testName + folderSuffix); + return; + } + + System.out.println("Checkpoint success"); + process.destroy(); + + } + + /** + * Restore the process, if process has been restored correctly put Mapped Buffer to + * 'restored' state, else copy the .img, log and stats files and put MappedBuffer to 'terminate' + * state and mark test as failed + * + * @param restoreOpt Additional options for restore + * @throws IOException + * @throws InterruptedException + */ + private void restore(String restoreOpt) throws IOException, InterruptedException { + ProcessBuilder builder; + System.out.println("Restoring process"); + String command = "../../criu/criu restore -d -vvv --shell-job -D " + logFolder + " -o restore.log"; + if (0 == restoreOpt.length()) { + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } else { + command = command + " " + restoreOpt; + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } + + Process process = builder.start(); + BufferedReader stdError = new BufferedReader(new InputStreamReader(process.getErrorStream())); + int exitCode = process.waitFor(); + + if (0 != exitCode) { + /* + * Print the error stream + */ + String line = stdError.readLine(); + while (null != line) { + System.out.println(line); + line = stdError.readLine(); + } + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + /* + * If restore fails copy img files, dump.log, restore.log, stats-dump, stats-restore + */ + String folderSuffix = copyFiles(); + Assert.fail(testName + ": ERROR: Error during restore: exitCode of restore process was not zero.\nFor more details check restore.log in " + outputFolder + testName + folderSuffix); + + return; + } else { + System.out.println("Restore success"); + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + } + process.destroy(); + } +} diff --git a/test/javaTests/src/org/criu/java/tests/FileRead.java b/test/javaTests/src/org/criu/java/tests/FileRead.java new file mode 100644 index 0000000000..d94a14112a --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/FileRead.java @@ -0,0 +1,175 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class FileRead { + private static String TESTNAME = "FileRead"; + + /** + * @param i int value denoting the line number. + * @return The line as a string. + */ + private static String getLine(int i) { + return "Line No: " + i + "\n"; + } + + /** + * Write in a file, line by line, and read it, checkpoint and restore + * and then continue to read and write the file. + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + Logger logger = null; + int wi, ri = 0; + try { + File file = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/FileRead_write.txt"); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if ('I' != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + logger.log(Level.INFO, "Checking existence of file to be read and written to."); + if (file.exists()) { + file.delete(); + } + boolean newFile = file.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Cannot create a new file to read and write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedWriter brw = new BufferedWriter(new FileWriter(file)); + BufferedReader brr = new BufferedReader(new FileReader(file)); + + logger.log(Level.INFO, "Start writing the lines in file"); + + for (wi = 1; wi <= 5; wi++) { + brw.write(getLine(wi)); + } + + brw.flush(); + String s = "Line No: 0"; + int i; + + for (i = 0; i < 50; i++) { + brw.write(getLine(wi)); + brw.flush(); + wi++; + s = brr.readLine(); + ri = Integer.parseInt(s.replaceAll("[\\D]", "")); + } + + wi--; + logger.log(Level.INFO, "Going to checkpoint"); + + /* + * Checkpoint and wait for restore + */ + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + + brw.flush(); + + try { + s = brr.readLine(); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Error: Buffered Reader is not reading file"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (null == s || s.isEmpty()) { + logger.log(Level.SEVERE, "Error: Error while reading lines after restore: Line read is null"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + int readLineNo = Integer.parseInt(s.replaceAll("[\\D]", "")); + if (ri + 1 != readLineNo) { + logger.log(Level.SEVERE, "Error: Not reading at correct line"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + String ch = brr.readLine(); + while (null != ch && !ch.isEmpty()) { + s = ch; + ch = brr.readLine(); + } + + readLineNo = Integer.parseInt(s.replaceAll("[\\D]", "")); + + if (readLineNo != wi) { + logger.log(Level.SEVERE, "Error: Data written has been lost"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + try { + brw.write(getLine(wi + 1)); + brw.flush(); + } catch (IOException e) { + logger.log(Level.SEVERE, "Error: cannot write file after restore"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + s = brr.readLine(); + readLineNo = Integer.parseInt(s.replaceAll("[\\D]", "")); + + if (readLineNo != wi + 1) { + logger.log(Level.SEVERE, "Error: Data not written correctly"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "File is being read and written to correctly after restore!"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + brw.close(); + brr.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/Helper.java b/test/javaTests/src/org/criu/java/tests/Helper.java new file mode 100644 index 0000000000..d608fba47d --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/Helper.java @@ -0,0 +1,99 @@ +package org.criu.java.tests; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.logging.SimpleFormatter; + +class Helper { + static String MEMORY_MAPPED_FILE_NAME = "output/file"; + static String PASS_MESSAGE = "Test was a Success!!!"; + static String OUTPUT_FOLDER_NAME = "output"; + static String PACKAGE_NAME = "org.criu.java.tests"; + static String PID_APPEND = ".pid"; + static String SOURCE_FOLDER = "src/org/criu/java/tests"; + static String LOG_FOLDER = "CRlogs"; + static int MAPPED_REGION_SIZE = 100; + static int MAPPED_INDEX = 1; + static char STATE_RESTORE = 'R'; + static char STATE_CHECKPOINT = 'C'; + static char STATE_INIT = 'I'; + static char STATE_TERMINATE = 'T'; + static char STATE_END = 'E'; + static char STATE_FAIL = 'F'; + static char STATE_PASS = 'P'; + + /** + * Create a new log file and pidfile and write + * the pid to the pidFile. + * + * @param testName Name of the java test + * @param pid Pid of the java test process + * @param logger + * @return 0 or 1 denoting whether the function was successful or not. + * @throws IOException + */ + static int init(String testName, String pid, Logger logger) throws IOException { + File pidfile = new File(OUTPUT_FOLDER_NAME + "/" + testName + "/" + testName + PID_APPEND); + + FileHandler handler = new FileHandler(Helper.OUTPUT_FOLDER_NAME + "/" + testName + "/" + testName + ".log", false); + handler.setFormatter(new SimpleFormatter()); + handler.setLevel(Level.FINE); + logger.addHandler(handler); + logger.setLevel(Level.FINE); + + /* + * Create a pid file and write the process's pid into it. + */ + if (pidfile.exists()) { + pidfile.delete(); + } + boolean newFile = pidfile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Cannot create new pid file."); + return 1; + } + BufferedWriter pidWriter = new BufferedWriter(new FileWriter(pidfile)); + pidWriter.write(pid + "\n"); + pidWriter.close(); + return 0; + } + + /** + * Put the Mapped Buffer to 'Ready to be checkpointed' state and wait for restore. + * + * @param b The MappedByteBuffer from the calling process. + * @param logger The Logger from the calling process. + */ + static void checkpointAndWait(MappedByteBuffer b, Logger logger) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + char c = b.getChar(Helper.MAPPED_INDEX); + /* + * Loop while MappedByteBuffer is in 'To be checkpointed' state + */ + while (Helper.STATE_CHECKPOINT == c) { + c = b.getChar(Helper.MAPPED_INDEX); + } + /* + * Test is in 'T' state if some error or exception occurs during checkpoint or restore. + */ + if (Helper.STATE_TERMINATE == c) { + logger.log(Level.SEVERE, "Error during checkpoint-restore, Test terminated"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + /* + * The expected state of MappedByteBuffer is Helper.STATE_RESTORE-restored state. + */ + if (Helper.STATE_RESTORE != c) { + logger.log(Level.INFO, "Error: Test state is not the expected Restored state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/ImgFilter.java b/test/javaTests/src/org/criu/java/tests/ImgFilter.java new file mode 100644 index 0000000000..97087c2ccd --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/ImgFilter.java @@ -0,0 +1,11 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.FilenameFilter; + +class ImgFilter implements FilenameFilter { + @Override + public boolean accept(File dir, String fileName) { + return (fileName.endsWith(".img")); + } +} diff --git a/test/javaTests/test.xml b/test/javaTests/test.xml new file mode 100644 index 0000000000..8ff67c5e0d --- /dev/null +++ b/test/javaTests/test.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + From 72d44f81c9d85ed734cef962067a77a81d1adbc4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 18:31:58 +0000 Subject: [PATCH 112/277] travis: Disallow failures on ia32 It seems pretty stable and hasn't add many false-positives during last months. While can reveal some issues for compatible C/R code. Signed-off-by: Dmitry Safonov --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4cde9c4fba..7a0c29a555 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,8 +40,6 @@ matrix: - env: TR_ARCH=fedora-rawhide-aarch64 - env: TR_ARCH=s390x - env: TR_ARCH=local GCOV=1 - - env: TR_ARCH=local COMPAT_TEST=y - - env: TR_ARCH=local CLANG=1 COMPAT_TEST=y script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH after_success: From d125c2b1ab550e309f4524828c14726e244c63f1 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:48 +0300 Subject: [PATCH 113/277] mount: remove useless check in populate_mnt_ns The path: restore_root_task prepare_namespace_before_tasks mntns_maybe_create_roots is always called before the path below: retore_root_task fork_with_pid restore_task_with_children prepare_namespace prepare_mnt_ns populate_mnt_ns So (!!mnt_roots) == (root_ns_mask & CLONE_NEWNS) in populate_mnt_ns, but in prepare_mnt_ns we've already checked that it is true, so there is no need in these check - remove it. Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 486d017197..8022957780 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -3139,15 +3139,12 @@ static int populate_mnt_ns(void) struct ns_id *nsid; int ret; - if (mnt_roots) { - /* mnt_roots is a tmpfs mount and it's private */ - root_yard_mp = mnt_entry_alloc(); - if (!root_yard_mp) - return -1; + root_yard_mp = mnt_entry_alloc(); + if (!root_yard_mp) + return -1; - root_yard_mp->mountpoint = mnt_roots; - root_yard_mp->mounted = true; - } + root_yard_mp->mountpoint = mnt_roots; + root_yard_mp->mounted = true; pms = mnt_build_tree(mntinfo, root_yard_mp); if (!pms) From 53b22e4c20d1de728194be51041159fb8ac5eaf6 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:49 +0300 Subject: [PATCH 114/277] ns: make rst_new_ns_id static It's never used outside of namespaces.c Signed-off-by: Pavel Tikhomirov --- criu/include/namespaces.h | 1 - criu/namespaces.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h index 287abb3c8a..a9a970a9be 100644 --- a/criu/include/namespaces.h +++ b/criu/include/namespaces.h @@ -166,7 +166,6 @@ extern int restore_ns(int rst, struct ns_desc *nd); extern int dump_task_ns_ids(struct pstree_item *); extern int predump_task_ns_ids(struct pstree_item *); -extern struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd, enum ns_type t); extern int rst_add_ns_id(unsigned int id, struct pstree_item *, struct ns_desc *nd); extern struct ns_id *lookup_ns_by_id(unsigned int id, struct ns_desc *nd); diff --git a/criu/namespaces.c b/criu/namespaces.c index a228737ee8..57f6bdfef4 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -290,7 +290,7 @@ static void nsid_add(struct ns_id *ns, struct ns_desc *nd, unsigned int id, pid_ pr_info("Add %s ns %d pid %d\n", nd->str, ns->id, ns->ns_pid); } -struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, +static struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd, enum ns_type type) { struct ns_id *nsid; From 743cb26e9caee2d6536f83fd62cffea05106ad37 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:50 +0300 Subject: [PATCH 115/277] ns/restore/image: do not read namespace images for non-namespaced case Images for mount and net namespaces are empty if ns does not belong to us, thus we don't need to collect on restore. By adding these checks we will eliminate suspicious messages in logs about lack of images: ./test/zdtm.py run -k always -f h -t zdtm/static/env00 env00/54/2/restore.log:(00.000332) No mountpoints-5.img image env00/54/2/restore.log:(00.000342) No netns-2.img image Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 5 +++++ criu/net.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/criu/mount.c b/criu/mount.c index 8022957780..fdaaa7b314 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2989,6 +2989,11 @@ int read_mnt_ns_img(void) struct mount_info *pms = NULL; struct ns_id *nsid; + if (!(root_ns_mask & CLONE_NEWNS)) { + mntinfo = NULL; + return 0; + } + for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) { if (nsid->nd != &mnt_ns_desc) continue; diff --git a/criu/net.c b/criu/net.c index fe9b51addc..2285ae4032 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2086,6 +2086,9 @@ int read_net_ns_img(void) { struct ns_id *ns; + if (!(root_ns_mask & CLONE_NEWNET)) + return 0; + for (ns = ns_ids; ns != NULL; ns = ns->next) { struct cr_img *img; int ret; From 74faa961a686fd6c494da2f4f206df2b797f5766 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:51 +0300 Subject: [PATCH 116/277] mount: rework mount tree build step on restore Build each mntns mount tree alone just after reading mounts for it from image. These additional step before merging everything to a single mount tree allows us to have pointers to each mntns root mount at hand, also it allows us to remove extra complication from mnt_build_tree. Teach collect_mnt_from_image return a tail pointer, so we can merge lists together later after building each tree. Add separate merge_mount_trees helper to create joint mount tree for all mntns'es and simplify mnt_build_ids_tree. I don't see any place where we use mntinfo_tree on restore, so save the real root of mntns mounts tree in it, instead of root_yard_mp, will need it in next patches for checking restore of these trees. v2: prepend children to the root_yard in merge_mount_trees so that the order in merged tree persists Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 131 +++++++++++++++++++++++++-------------------------- 1 file changed, 65 insertions(+), 66 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index fdaaa7b314..49708ffd5c 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -330,7 +330,7 @@ static bool mounts_equal(struct mount_info *a, struct mount_info *b) */ static char *mnt_roots; -static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mount_info *yard_mount) +static struct mount_info *mnt_build_ids_tree(struct mount_info *list) { struct mount_info *m, *root = NULL; @@ -351,41 +351,14 @@ static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mou if (!parent) { /* Only a root mount can be without parent */ - if (root == NULL && m->is_ns_root) { + if (!root && m->is_ns_root) { root = m; - if (!yard_mount) - continue; - } - - if (!root) { - pr_err("No parent found for mountpoint %d (@%s)\n", - m->mnt_id, m->mountpoint); - return NULL; - } - - pr_debug("Mountpoint %d (@%s) w/o parent %d\n", - m->mnt_id, m->mountpoint, m->parent_mnt_id); - - if (!mounts_sb_equal(root, m) || - strcmp(root->root, m->root)) { - pr_err("Nested mount namespaces with different " - "roots %d (@%s %s) %d (@%s %s) are not supported yet\n", - root->mnt_id, root->mountpoint, root->root, - m->mnt_id, m->mountpoint, m->root); - return NULL; - } - - /* Mount all namespace roots into the roots yard. */ - parent = yard_mount; - if (unlikely(!yard_mount)) { - pr_err("Nested mount %d (@%s %s) w/o root insertion detected\n", - m->mnt_id, m->mountpoint, m->root); - return NULL; + continue; } - pr_debug("Mountpoint %d (@%s) get parent %d (@%s)\n", - m->mnt_id, m->mountpoint, - parent->mnt_id, parent->mountpoint); + pr_err("No parent found for mountpoint %d (@%s)\n", + m->mnt_id, m->mountpoint); + return NULL; } m->parent = parent; @@ -397,9 +370,6 @@ static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mou return NULL; } - if (yard_mount) - return yard_mount; - return root; } @@ -997,8 +967,7 @@ static int resolve_shared_mounts(struct mount_info *info, int root_master_id) return 0; } -static struct mount_info *mnt_build_tree(struct mount_info *list, - struct mount_info *root_mp) +static struct mount_info *mnt_build_tree(struct mount_info *list) { struct mount_info *tree; @@ -1007,7 +976,7 @@ static struct mount_info *mnt_build_tree(struct mount_info *list, */ pr_info("Building mountpoints tree\n"); - tree = mnt_build_ids_tree(list, root_mp); + tree = mnt_build_ids_tree(list); if (!tree) return NULL; @@ -1690,7 +1659,7 @@ struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump) return NULL; } - ns->mnt.mntinfo_tree = mnt_build_tree(pm, NULL); + ns->mnt.mntinfo_tree = mnt_build_tree(pm); if (ns->mnt.mntinfo_tree == NULL) goto err; @@ -2881,7 +2850,7 @@ static int get_mp_mountpoint(char *mountpoint, struct mount_info *mi, char *root return 0; } -static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid) +static int collect_mnt_from_image(struct mount_info **head, struct mount_info **tail, struct ns_id *nsid) { MntEntry *me = NULL; int ret, root_len = 1; @@ -2909,8 +2878,10 @@ static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid) goto err; pm->nsid = nsid; - pm->next = *pms; - *pms = pm; + pm->next = *head; + *head = pm; + if (!*tail) + *tail = pm; pm->mnt_id = me->mnt_id; pm->parent_mnt_id = me->parent_mnt_id; @@ -2995,11 +2966,20 @@ int read_mnt_ns_img(void) } for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) { + struct mount_info *head = NULL, *tail = NULL; + if (nsid->nd != &mnt_ns_desc) continue; - if (collect_mnt_from_image(&pms, nsid)) + if (collect_mnt_from_image(&head, &tail, nsid)) + return -1; + + nsid->mnt.mntinfo_tree = mnt_build_tree(head); + if (!nsid->mnt.mntinfo_tree) return -1; + + tail->next = pms; + pms = head; } mntinfo = pms; @@ -3101,6 +3081,40 @@ void fini_restore_mntns(void) } } +static int merge_mount_trees(struct mount_info *root_yard) +{ + struct mount_info *first = NULL; + struct ns_id *nsid; + + /* Merge mount trees together under root_yard */ + for (nsid = ns_ids; nsid; nsid = nsid->next) { + struct mount_info *root; + + if (nsid->nd != &mnt_ns_desc) + continue; + + root = nsid->mnt.mntinfo_tree; + + if (!first) + first = root; + else if (!mounts_sb_equal(root, first) || + strcmp(root->root, first->root)) { + pr_err("Nested mount namespaces with different " + "roots %d (@%s %s) %d (@%s %s) are not supported yet\n", + root->mnt_id, root->mountpoint, root->root, + first->mnt_id, first->mountpoint, first->root); + return -1; + } + + pr_debug("Mountpoint %d (@%s) moved to the root yard\n", + root->mnt_id, root->mountpoint); + root->parent = root_yard; + list_add(&root->siblings, &root_yard->children); + } + + return 0; +} + /* * All nested mount namespaces are restore as sub-trees of the root namespace. */ @@ -3140,8 +3154,6 @@ static int populate_roots_yard(void) static int populate_mnt_ns(void) { - struct mount_info *pms; - struct ns_id *nsid; int ret; root_yard_mp = mnt_entry_alloc(); @@ -3151,40 +3163,27 @@ static int populate_mnt_ns(void) root_yard_mp->mountpoint = mnt_roots; root_yard_mp->mounted = true; - pms = mnt_build_tree(mntinfo, root_yard_mp); - if (!pms) + if (merge_mount_trees(root_yard_mp)) return -1; #ifdef CONFIG_BINFMT_MISC_VIRTUALIZED if (!opts.has_binfmt_misc && !list_empty(&binfmt_misc_list)) { /* Add to mount tree. Generic code will mount it later */ - ret = add_cr_time_mount(pms, "binfmt_misc", BINFMT_MISC_HOME, 0); + ret = add_cr_time_mount(root_yard_mp, "binfmt_misc", BINFMT_MISC_HOME, 0); if (ret) return -1; } #endif - if (resolve_shared_mounts(mntinfo, pms->master_id)) + if (resolve_shared_mounts(mntinfo, 0)) return -1; - for (nsid = ns_ids; nsid; nsid = nsid->next) { - if (nsid->nd != &mnt_ns_desc) - continue; - - /* - * Make trees of all namespaces look the - * same, so that manual paths resolution - * works on them. - */ - nsid->mnt.mntinfo_tree = pms; - } - if (validate_mounts(mntinfo, false)) return -1; - mnt_tree_for_each(pms, set_is_overmounted); + mnt_tree_for_each(root_yard_mp, set_is_overmounted); - if (find_remap_mounts(pms)) + if (find_remap_mounts(root_yard_mp)) return -1; if (populate_roots_yard()) @@ -3193,8 +3192,8 @@ static int populate_mnt_ns(void) if (mount_clean_path()) return -1; - ret = mnt_tree_for_each(pms, do_mount_one); - mnt_tree_for_each(pms, do_close_one); + ret = mnt_tree_for_each(root_yard_mp, do_mount_one); + mnt_tree_for_each(root_yard_mp, do_close_one); if (ret == 0 && fixup_remap_mounts()) return -1; From 8529a48a1f5f47d9f8715d2d12de3224a325b7e3 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 27 Jun 2019 12:43:40 +0300 Subject: [PATCH 117/277] mount: make mnt_resort_siblings nonrecursive and reuse friendly Add mnt_subtree_next DFS-next search to remove recursion. v5: add these patch, remove recursion from sorting helpers v6: rip out butifull yet unused step-part of nfs-next algorithm Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 57 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 49708ffd5c..974af6eb22 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -385,13 +385,12 @@ static unsigned int mnt_depth(struct mount_info *m) return depth; } -static void mnt_resort_siblings(struct mount_info *tree) +static void __mnt_resort_children(struct mount_info *parent) { - struct mount_info *m, *p; LIST_HEAD(list); /* - * Put siblings of each node in an order they can be (u)mounted + * Put children mounts in an order they can be (u)mounted * I.e. if we have mounts on foo/bar/, foo/bar/foobar/ and foo/ * we should put them in the foo/bar/foobar/, foo/bar/, foo/ order. * Otherwise we will not be able to (u)mount them in a sequence. @@ -403,11 +402,12 @@ static void mnt_resort_siblings(struct mount_info *tree) * to contain hundreds (or more) elements. */ - pr_info("\tResorting siblings on %d\n", tree->mnt_id); - while (!list_empty(&tree->children)) { + pr_info("\tResorting children of %d in mount order\n", parent->mnt_id); + while (!list_empty(&parent->children)) { + struct mount_info *m, *p; unsigned int depth; - m = list_first_entry(&tree->children, struct mount_info, siblings); + m = list_first_entry(&parent->children, struct mount_info, siblings); list_del(&m->siblings); depth = mnt_depth(m); @@ -416,10 +416,31 @@ static void mnt_resort_siblings(struct mount_info *tree) break; list_add_tail(&m->siblings, &p->siblings); - mnt_resort_siblings(m); } - list_splice(&list, &tree->children); + list_splice(&list, &parent->children); +} + +static struct mount_info *mnt_subtree_next(struct mount_info *mi, + struct mount_info *root); + +static void resort_siblings(struct mount_info *root, + void (*resort_children)(struct mount_info *)) { + struct mount_info *mi = root; + while (1) { + /* + * Explanation: sorting the children of the tree like these is + * safe and does not break the tree search in mnt_subtree_next + * (DFS-next search), as we sort children before calling next + * on parent and thus before DFS-next ever touches them, so + * from the perspective of DFS-next all children look like they + * are already sorted. + */ + resort_children(mi); + mi = mnt_subtree_next(mi, root); + if (!mi) + break; + } } static void mnt_tree_show(struct mount_info *tree, int off) @@ -980,7 +1001,7 @@ static struct mount_info *mnt_build_tree(struct mount_info *list) if (!tree) return NULL; - mnt_resort_siblings(tree); + resort_siblings(tree, __mnt_resort_children); pr_info("Done:\n"); mnt_tree_show(tree, 0); return tree; @@ -3821,3 +3842,21 @@ int remount_readonly_mounts(void) */ return call_helper_process(ns_remount_readonly_mounts, NULL); } + +static struct mount_info *mnt_subtree_next(struct mount_info *mi, + struct mount_info *root) +{ + if (!list_empty(&mi->children)) + return list_entry(mi->children.next, + struct mount_info, siblings); + + while (mi->parent && mi != root) { + if (mi->siblings.next == &mi->parent->children) + mi = mi->parent; + else + return list_entry(mi->siblings.next, + struct mount_info, siblings); + } + + return NULL; +} From 7cc9a726d135e6ec734188b3a5a2d4aab6a85ea1 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Tue, 5 Nov 2019 15:19:25 +0530 Subject: [PATCH 118/277] Run java functional tests on travis Signed-off-by: Nidhi Gupta --- .travis.yml | 2 ++ scripts/build/Dockerfile.openj9-alpine | 33 ++++++++++++++++++++++++++ scripts/build/Dockerfile.openj9-ubuntu | 30 +++++++++++++++++++++++ scripts/travis/Makefile | 3 +++ scripts/travis/openj9-test.sh | 22 +++++++++++++++++ 5 files changed, 90 insertions(+) create mode 100644 scripts/build/Dockerfile.openj9-alpine create mode 100644 scripts/build/Dockerfile.openj9-ubuntu create mode 100755 scripts/travis/openj9-test.sh diff --git a/.travis.yml b/.travis.yml index 7a0c29a555..6e854540b0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,8 @@ env: - TR_ARCH=fedora-rawhide-aarch64 - TR_ARCH=centos - TR_ARCH=podman-test + - TR_ARCH=openj9-test + matrix: include: - os: linux diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine new file mode 100644 index 0000000000..654e7bf317 --- /dev/null +++ b/scripts/build/Dockerfile.openj9-alpine @@ -0,0 +1,33 @@ +FROM adoptopenjdk/openjdk8-openj9:alpine + +RUN apk update && apk add \ + bash \ + build-base \ + ccache \ + coreutils \ + git \ + gnutls-dev \ + libaio-dev \ + libcap-dev \ + libnet-dev \ + libnl3-dev \ + pkgconfig \ + protobuf-c-dev \ + protobuf-dev \ + python \ + sudo \ + maven \ + py-yaml \ + py-pip \ + py2-future \ + ip6tables \ + iptables \ + bash + +COPY . /criu +WORKDIR /criu + +RUN make + +ENTRYPOINT mvn -f test/javaTests/pom.xml test + diff --git a/scripts/build/Dockerfile.openj9-ubuntu b/scripts/build/Dockerfile.openj9-ubuntu new file mode 100644 index 0000000000..13d9080ff2 --- /dev/null +++ b/scripts/build/Dockerfile.openj9-ubuntu @@ -0,0 +1,30 @@ +FROM adoptopenjdk/openjdk8-openj9:latest + +RUN apt-get update && apt-get install -y --no-install-recommends protobuf-c-compiler \ + libprotobuf-c-dev \ + libaio-dev \ + python-future \ + libprotobuf-dev \ + protobuf-compiler \ + libcap-dev \ + libnl-3-dev \ + gdb \ + bash \ + python-protobuf \ + python-yaml \ + libnet-dev \ + libnl-route-3-dev \ + libbsd-dev \ + make \ + git \ + pkg-config \ + gcc \ + maven + +COPY . /criu +WORKDIR /criu + +RUN make + +ENTRYPOINT mvn -f test/javaTests/pom.xml test + diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index baddd6eb10..c6b67935b2 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -41,5 +41,8 @@ docker-test: podman-test: ./podman-test.sh +openj9-test: + ./openj9-test.sh + %: $(MAKE) -C ../build $@$(target-suffix) diff --git a/scripts/travis/openj9-test.sh b/scripts/travis/openj9-test.sh new file mode 100755 index 0000000000..968f064f85 --- /dev/null +++ b/scripts/travis/openj9-test.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +cd ../.. + +failures="" + +docker build -t criu-openj9-ubuntu-test:latest -f scripts/build/Dockerfile.openj9-ubuntu . +docker run --rm --privileged criu-openj9-ubuntu-test:latest +if [ $? -ne 0 ]; then + failures=`echo "$failures ubuntu"` +fi + +docker build -t criu-openj9-alpine-test:latest -f scripts/build/Dockerfile.openj9-alpine . +docker run --rm --privileged criu-openj9-alpine-test:latest +if [ $? -ne 0 ]; then + failures=`echo "$failures alpine"` +fi + +if [ -n "$failures" ]; then + echo "Tests failed on $failures" + exit 1 +fi From 7cdca4c2f547ea93cd4de9b93d1bf2989825b197 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:52:55 +0100 Subject: [PATCH 119/277] travis: fix copy paste error from previous commit In my previous commit I copied a line with a return into the main script body. bash can only return from functions. This changes return to exit. Signed-off-by: Adrian Reber --- scripts/travis/travis-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 1f6b19130b..07311511c6 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -86,7 +86,7 @@ if [ "$WIDTH" -gt 80 ]; then exit 1 fi -[ -n "$SKIP_TRAVIS_TEST" ] && return +[ -n "$SKIP_TRAVIS_TEST" ] && exit 0 ulimit -c unlimited From e98fe212accfda1a2da80112750c2045d66bd61b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:54:22 +0100 Subject: [PATCH 120/277] travis: Do not run privileged containers in LXD Travis uses unprivileged containers for aarch64 in LXD. Docker with '--privileged' fails in such situation. This changes the travis setup to only start docker with '--privileged' if running on x86_64. Signed-off-by: Adrian Reber --- scripts/travis/Makefile | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index c6b67935b2..80c7b9230a 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -13,6 +13,9 @@ endif TARGETS := alpine fedora-rawhide centos ZDTM_OPTIONS := +UNAME := $(shell uname -m) + +export UNAME alpine: ZDTM_OPTIONS=-x zdtm/static/binfmt_misc -x zdtm/static/netns-nf -x zdtm/static/sched_policy00 -x zdtm/static/seccomp_strict -x zdtm/static/sigaltstack -x zdtm/static/signalfd00 -x zdtm/static/config_inotify_irmap @@ -23,17 +26,29 @@ define DOCKER_JSON endef export DOCKER_JSON -$(TARGETS): - echo "$$DOCKER_JSON" > /etc/docker/daemon.json - systemctl restart docker + +ifeq ($(UNAME),x86_64) + CONTAINER_OPTS := --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run +else + CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run +endif + +restart-docker: + if [ "$$UNAME" = "x86_64" ]; then \ + echo "$$DOCKER_JSON" > /etc/docker/daemon.json; \ + cat /etc/docker/daemon.json; \ + systemctl status docker; \ + systemctl restart docker; \ + systemctl status docker; \ + fi + +$(TARGETS): restart-docker $(MAKE) -C ../build $@$(target-suffix) - docker run --env-file docker.env --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run criu-$@ scripts/travis/travis-tests + docker run --env-file docker.env $(CONTAINER_OPTS) criu-$@ scripts/travis/travis-tests -fedora-asan: - echo "$$DOCKER_JSON" > /etc/docker/daemon.json - systemctl restart docker +fedora-asan: restart-docker $(MAKE) -C ../build $@$(target-suffix) - docker run --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run criu-$@ ./scripts/travis/asan.sh $(ZDTM_OPTIONS) + docker run -it $(CONTAINER_OPTS) criu-$@ ./scripts/travis/asan.sh $(ZDTM_OPTIONS) docker-test: ./docker-test.sh From f7c5bc8cee433f6b961c13b629cb688f831367b8 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:56:15 +0100 Subject: [PATCH 121/277] Makefile hack for travis aarch64/armv8l For CRIU's compile only tests for armv7hf on Travis we are using 'setarch linux32' which returns armv8l on Travis aarch64. This adds a path in the Makefile to treat armv8l just as armv7hf during compile. This enables us to run armv7hf compile tests on Travis aarch64 hardware. Much faster. Maybe not entirely correct, but probably good enough for compile testing in an armv7hf container. Signed-off-by: Adrian Reber --- Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0140330e14..f827e7baa7 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,6 @@ endif # Architecture specific options. ifeq ($(ARCH),arm) ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') - DEFINES := -DCONFIG_ARMV$(ARMV) -DCONFIG_VDSO_32 ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 @@ -45,6 +44,16 @@ ifeq ($(ARCH),arm) USERCFLAGS += -march=armv7-a endif + ifeq ($(ARMV),8) + # Running 'setarch linux32 uname -m' returns armv8l on travis aarch64. + # This tells CRIU to handle armv8l just as armv7hf. Right now this is + # only used for compile testing. No further verification of armv8l exists. + USERCFLAGS += -march=armv7-a + ARMV := 7 + endif + + DEFINES := -DCONFIG_ARMV$(ARMV) -DCONFIG_VDSO_32 + PROTOUFIX := y # For simplicity - compile code in Arm mode without interwork. # We could choose Thumb mode as default instead - but a dirty From 6f1fb1b2361d5b8e5bdce5fe02282c9ea78044f8 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:58:54 +0100 Subject: [PATCH 122/277] travis: switch all arm related tests to real hardware This switches all arm related tests (32bit and 64bit) to the aarch64 systems Travis provides. For arm32 we are running in a armv7hf container on aarch64 with 'setarch linux32'. The main changes are that docker on Travis aarch64 cannot use '--privileged' as Travis is using unprivileged LXD containers to setup the testing environment. Signed-off-by: Adrian Reber --- .travis.yml | 50 +++++++++++++++---- scripts/build/Dockerfile.armv7hf.hdr | 4 +- scripts/build/Dockerfile.armv7hf.tmpl | 2 +- .../Dockerfile.fedora-rawhide-aarch64.hdr | 3 -- .../Dockerfile.fedora-rawhide-aarch64.tmpl | 1 - scripts/build/Dockerfile.linux32.tmpl | 47 +++++++++++++++++ scripts/build/Makefile | 4 +- scripts/travis/Makefile | 2 + scripts/travis/podman-test.sh | 2 +- 9 files changed, 93 insertions(+), 22 deletions(-) delete mode 100644 scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr delete mode 120000 scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl create mode 100644 scripts/build/Dockerfile.linux32.tmpl diff --git a/.travis.yml b/.travis.yml index 6e854540b0..85b6b6e07a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: c sudo: required -dist: xenial +dist: bionic cache: ccache services: - docker @@ -9,21 +9,12 @@ env: - TR_ARCH=local CLANG=1 - TR_ARCH=local COMPAT_TEST=y - TR_ARCH=local CLANG=1 COMPAT_TEST=y - - TR_ARCH=alpine - - TR_ARCH=fedora-asan - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - - TR_ARCH=armv7hf - TR_ARCH=ppc64le - TR_ARCH=s390x - - TR_ARCH=armv7hf CLANG=1 - TR_ARCH=ppc64le CLANG=1 - - TR_ARCH=alpine CLANG=1 - TR_ARCH=docker-test - - TR_ARCH=fedora-rawhide - - TR_ARCH=fedora-rawhide-aarch64 - - TR_ARCH=centos - - TR_ARCH=podman-test - TR_ARCH=openj9-test matrix: @@ -36,10 +27,47 @@ matrix: arch: arm64 env: TR_ARCH=local CLANG=1 dist: bionic + - os: linux + arch: arm64 + # This runs on aarch64 with 'setarch linux32' + env: TR_ARCH=armv7hf + dist: bionic + - os: linux + arch: arm64 + # This runs on aarch64 with 'setarch linux32' + env: TR_ARCH=armv7hf CLANG=1 + dist: bionic + - os: linux + arch: arm64 + env: TR_ARCH=fedora-rawhide + dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=fedora-rawhide + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=podman-test + dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=alpine CLANG=1 + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=alpine + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=centos + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=fedora-asan + dist: xenial # test hangs on bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide - - env: TR_ARCH=fedora-rawhide-aarch64 - env: TR_ARCH=s390x - env: TR_ARCH=local GCOV=1 script: diff --git a/scripts/build/Dockerfile.armv7hf.hdr b/scripts/build/Dockerfile.armv7hf.hdr index d453d6df70..7c66474e56 100644 --- a/scripts/build/Dockerfile.armv7hf.hdr +++ b/scripts/build/Dockerfile.armv7hf.hdr @@ -1,3 +1 @@ -FROM arm32v7/ubuntu:xenial - -COPY scripts/build/qemu-user-static/usr/bin/qemu-arm-static /usr/bin/qemu-arm-static +FROM arm32v7/ubuntu:bionic diff --git a/scripts/build/Dockerfile.armv7hf.tmpl b/scripts/build/Dockerfile.armv7hf.tmpl index cb804790e6..7bc6d9cde9 120000 --- a/scripts/build/Dockerfile.armv7hf.tmpl +++ b/scripts/build/Dockerfile.armv7hf.tmpl @@ -1 +1 @@ -Dockerfile.tmpl \ No newline at end of file +Dockerfile.linux32.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr b/scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr deleted file mode 100644 index 82f29e3365..0000000000 --- a/scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr +++ /dev/null @@ -1,3 +0,0 @@ -FROM arm64v8/fedora:rawhide - -COPY scripts/build/qemu-user-static/usr/bin/qemu-aarch64-static /usr/bin/qemu-aarch64-static diff --git a/scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl b/scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl deleted file mode 120000 index e4c40309c5..0000000000 --- a/scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.fedora.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.linux32.tmpl b/scripts/build/Dockerfile.linux32.tmpl new file mode 100644 index 0000000000..5d3fe5139f --- /dev/null +++ b/scripts/build/Dockerfile.linux32.tmpl @@ -0,0 +1,47 @@ +ARG CC=gcc +ARG ENV1=FOOBAR + +RUN apt-get update && apt-get install -y \ + ccache \ + libnet-dev \ + libnl-route-3-dev \ + $CC \ + bsdmainutils \ + build-essential \ + git-core \ + iptables \ + libaio-dev \ + libcap-dev \ + libgnutls28-dev \ + libgnutls30 \ + libnl-3-dev \ + libprotobuf-c-dev \ + libprotobuf-dev \ + libselinux-dev \ + pkg-config \ + protobuf-c-compiler \ + protobuf-compiler \ + python-minimal \ + python-future + +COPY . /criu +WORKDIR /criu +ENV CC="ccache $CC" CCACHE_DIR=/tmp/.ccache CCACHE_NOCOMPRESS=1 $ENV1=yes + +RUN uname -m && setarch linux32 uname -m && setarch --list + +RUN mv .ccache /tmp && make mrproper && ccache -s && \ + date && \ +# Check single object build + setarch linux32 make -j $(nproc) CC="$CC" criu/parasite-syscall.o && \ +# Compile criu + setarch linux32 make -j $(nproc) CC="$CC" && \ + date && \ +# Check that "make mrproper" works + setarch linux32 make mrproper && ! git clean -ndx --exclude=scripts/build \ + --exclude=.config --exclude=test | grep . + +# Compile tests +RUN date && setarch linux32 make -j $(nproc) CC="$CC" -C test/zdtm && date + +#RUN make test/compel/handle_binary && ./test/compel/handle_binary diff --git a/scripts/build/Makefile b/scripts/build/Makefile index 3d4d91cd57..d7ad82aec2 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,5 @@ -QEMU_ARCHES := armv7hf ppc64le s390x fedora-rawhide-aarch64 # require qemu -ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos +QEMU_ARCHES := ppc64le s390x # require qemu +ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index 80c7b9230a..3731711490 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -28,6 +28,8 @@ endef export DOCKER_JSON ifeq ($(UNAME),x86_64) + # On anything besides x86_64 Travis is running unprivileged LXD + # containers which do not support running docker with '--privileged'. CONTAINER_OPTS := --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run else CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index 9bd1f3d8bc..eafdc73bee 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -11,7 +11,7 @@ apt-get install -qq \ apt-get update -qq -apt-get install -qqy podman +apt-get install -qqy podman containernetworking-plugins export SKIP_TRAVIS_TEST=1 From 524263e515084cd49849a3b7e0062b32b51f28c1 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 Nov 2019 08:38:16 +0100 Subject: [PATCH 123/277] travis: switch pcp64le and s390x to real hardware Now that Travis also supports ppc64le and s390x we can remove all qemu based docker emulation from our test setup. This now runs ppc64le and s390x tests on real hardware (LXD containers). Signed-off-by: Adrian Reber --- .travis.yml | 17 +++++++++---- scripts/build/Dockerfile.aarch64.hdr | 3 --- scripts/build/Dockerfile.aarch64.tmpl | 1 - scripts/build/Dockerfile.ppc64le.hdr | 5 ---- scripts/build/Dockerfile.ppc64le.tmpl | 1 - scripts/build/Dockerfile.s390x.hdr | 6 ----- scripts/build/Dockerfile.s390x.tmpl | 1 - scripts/build/Makefile | 21 +--------------- scripts/build/binfmt_misc | 13 ---------- scripts/build/extract-deb-pkg | 36 --------------------------- scripts/travis/travis-tests | 7 ------ 11 files changed, 13 insertions(+), 98 deletions(-) delete mode 100644 scripts/build/Dockerfile.aarch64.hdr delete mode 120000 scripts/build/Dockerfile.aarch64.tmpl delete mode 100644 scripts/build/Dockerfile.ppc64le.hdr delete mode 120000 scripts/build/Dockerfile.ppc64le.tmpl delete mode 100644 scripts/build/Dockerfile.s390x.hdr delete mode 120000 scripts/build/Dockerfile.s390x.tmpl delete mode 100755 scripts/build/binfmt_misc delete mode 100755 scripts/build/extract-deb-pkg diff --git a/.travis.yml b/.travis.yml index 85b6b6e07a..3c760d08aa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,14 +11,22 @@ env: - TR_ARCH=local CLANG=1 COMPAT_TEST=y - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - - TR_ARCH=ppc64le - - TR_ARCH=s390x - - TR_ARCH=ppc64le CLANG=1 - TR_ARCH=docker-test - TR_ARCH=openj9-test - matrix: include: + - os: linux + arch: ppc64le + env: TR_ARCH=local + dist: bionic + - os: linux + arch: ppc64le + env: TR_ARCH=local CLANG=1 + dist: bionic + - os: linux + arch: s390x + env: TR_ARCH=local + dist: bionic - os: linux arch: arm64 env: TR_ARCH=local @@ -68,7 +76,6 @@ matrix: allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide - - env: TR_ARCH=s390x - env: TR_ARCH=local GCOV=1 script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH diff --git a/scripts/build/Dockerfile.aarch64.hdr b/scripts/build/Dockerfile.aarch64.hdr deleted file mode 100644 index c90c980886..0000000000 --- a/scripts/build/Dockerfile.aarch64.hdr +++ /dev/null @@ -1,3 +0,0 @@ -FROM arm64v8/ubuntu:xenial - -COPY scripts/build/qemu-user-static/usr/bin/qemu-aarch64-static /usr/bin/qemu-aarch64-static diff --git a/scripts/build/Dockerfile.aarch64.tmpl b/scripts/build/Dockerfile.aarch64.tmpl deleted file mode 120000 index cb804790e6..0000000000 --- a/scripts/build/Dockerfile.aarch64.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.ppc64le.hdr b/scripts/build/Dockerfile.ppc64le.hdr deleted file mode 100644 index ba65901c2b..0000000000 --- a/scripts/build/Dockerfile.ppc64le.hdr +++ /dev/null @@ -1,5 +0,0 @@ -FROM ppc64le/ubuntu:xenial - -ENV QEMU_CPU POWER8 -COPY scripts/build/qemu-user-static/usr/bin/qemu-ppc64le-static /usr/bin/qemu-ppc64le-static -RUN sed -i '/security/ d' /etc/apt/sources.list diff --git a/scripts/build/Dockerfile.ppc64le.tmpl b/scripts/build/Dockerfile.ppc64le.tmpl deleted file mode 120000 index cb804790e6..0000000000 --- a/scripts/build/Dockerfile.ppc64le.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.s390x.hdr b/scripts/build/Dockerfile.s390x.hdr deleted file mode 100644 index e02097f625..0000000000 --- a/scripts/build/Dockerfile.s390x.hdr +++ /dev/null @@ -1,6 +0,0 @@ -FROM s390x/debian:latest - -ENV QEMU_CPU z900 -COPY scripts/build/qemu-user-static/usr/bin/qemu-s390x-static /usr/bin/qemu-s390x-static -# The security repository does not seem to exist anymore -RUN sed -i '/security/ d' /etc/apt/sources.list diff --git a/scripts/build/Dockerfile.s390x.tmpl b/scripts/build/Dockerfile.s390x.tmpl deleted file mode 120000 index cb804790e6..0000000000 --- a/scripts/build/Dockerfile.s390x.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.tmpl \ No newline at end of file diff --git a/scripts/build/Makefile b/scripts/build/Makefile index d7ad82aec2..a7c78e8bd4 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,4 @@ -QEMU_ARCHES := ppc64le s390x # require qemu -ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos armv7hf +ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker @@ -16,15 +15,6 @@ $(foreach arch,$(ARCHES),$(eval $(call ARCH_DEP,$(arch)))) Dockerfile.%: Dockerfile.%.hdr Dockerfile.%.tmpl cat $^ > $@ -qemu-user-static: - ./extract-deb-pkg qemu-user-static - -binfmt_misc: - ./binfmt_misc -.PHONY: binfmt_misc - -$(QEMU_ARCHES): qemu-user-static binfmt_misc - $(TARGETS): mkdir -p $(HOME)/.ccache mv $(HOME)/.ccache ../../ @@ -42,12 +32,3 @@ $(foreach t,$(TARGETS),$(eval $(call CLANG_DEP,$(t)))) %-clang: DB_ENV=--build-arg ENV1=CCACHE_CPP2 s390x-clang: DB_CC=--build-arg CC=clang-3.8 .PHONY: $(TARGETS_CLANG) - -clean: - rm -rf qemu-user-static - for ARCH in $(ARCHES); do \ - FILE=/proc/sys/fs/binfmt_misc/$$ARCH; \ - test -f $$FILE && echo -1 > $$FILE; \ - rm -f Dockerfile.$$ARCH; \ - done -.PHONY: clean diff --git a/scripts/build/binfmt_misc b/scripts/build/binfmt_misc deleted file mode 100755 index bf2a2ecad6..0000000000 --- a/scripts/build/binfmt_misc +++ /dev/null @@ -1,13 +0,0 @@ -set -e -x - -test -f /proc/sys/fs/binfmt_misc/armv7hf || - echo ':armv7hf:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-arm-static:' > /proc/sys/fs/binfmt_misc/register; - -test -f /proc/sys/fs/binfmt_misc/aarch64 || - echo ':aarch64:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-aarch64-static:' > /proc/sys/fs/binfmt_misc/register - -test -f /proc/sys/fs/binfmt_misc/ppc64le || - echo ':ppc64le:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00:\xff\xff\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00:/usr/bin/qemu-ppc64le-static:' > /proc/sys/fs/binfmt_misc/register - -test -f /proc/sys/fs/binfmt_misc/s390x || - echo ':s390x:M::\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-s390x-static:' > /proc/sys/fs/binfmt_misc/register diff --git a/scripts/build/extract-deb-pkg b/scripts/build/extract-deb-pkg deleted file mode 100755 index 44457bc5a4..0000000000 --- a/scripts/build/extract-deb-pkg +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -set -e -set -u -set -o pipefail -MIRROR="https://mirrors.kernel.org/ubuntu" -PKGS="$MIRROR/dists/bionic/universe/binary-amd64/Packages.gz" - -if [ $# -ne 1 ]; then - echo "Usage: $0 package-name" 1>&2 - exit 1 -fi - -if [ -d "$1" ]; then - echo "Directory $1 already exists -- exiting" - exit 0 -fi - -if ! pkg=$(curl -sSL "$PKGS" | zgrep "Filename.*$1" | awk '{ print $2 }'); then - echo "ERROR: no packages matching $1" 1>&2 - exit 1 -fi - -if [ "$(wc -w <<< "$pkg")" -gt 1 ]; then - echo "$pkg" 1>&2 - echo "ERROR: more than one match for $1" 1>&2 - exit 1 -fi - -mkdir "$1" -cd "$1" - -wget "$MIRROR/$pkg" -pkg=$(basename "$pkg") -ar vx "$pkg" -tar xJvf data.tar.xz diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 07311511c6..bc97fd4559 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -51,13 +51,6 @@ travis_prep () { CC="ccache $CC" fi - # The /etc/apt/sources.list in the current trusty image for ppc64le is - # broken and needs to be fixed - if [ "$TR_ARCH" = "ppc64le" ] ; then - sed -i '/security/ d' /etc/apt/sources.list - fi - - # Do not install x86_64 specific packages on other architectures if [ "$UNAME_M" = "x86_64" ]; then TRAVIS_PKGS="$TRAVIS_PKGS $X86_64_PKGS" From 025f8d9f284eb508841da20e2351c0d0c0dd77a2 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 Nov 2019 13:25:30 +0100 Subject: [PATCH 124/277] travis: remove group from .travis.yml Tests are successful even after removing 'group:' from .travis.yml. Apparently it is not necessary. Signed-off-by: Adrian Reber --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3c760d08aa..f6f71be48c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,4 +82,3 @@ script: after_success: - ccache -s - make -C scripts/travis after_success -group: deprecated-2017Q2 From 05b078f62b6a78a05aedcd29bcc8ebfe95162415 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:34 +0000 Subject: [PATCH 125/277] lib/ptrace: Allow PTRACE_PEEKDATA with errno != 0 >From man ptrace: > On error, all requests return -1, and errno is set appropriately. > Since the value returned by a successful PTRACE_PEEK* request may be > -1, the caller must clear errno before the call, and then check > it afterward to determine whether or not an error occurred. FWIW: if ptrace_peek_area() is called with (errno != 0) it may false-fail if the data is (-1). Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/ptrace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compel/src/lib/ptrace.c b/compel/src/lib/ptrace.c index 9142bac421..715e564df8 100644 --- a/compel/src/lib/ptrace.c +++ b/compel/src/lib/ptrace.c @@ -34,14 +34,20 @@ int ptrace_suspend_seccomp(pid_t pid) int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes) { unsigned long w; + int old_errno = errno; + if (bytes & (sizeof(long) - 1)) return -1; + + errno = 0; for (w = 0; w < bytes / sizeof(long); w++) { unsigned long *d = dst, *a = addr; + d[w] = ptrace(PTRACE_PEEKDATA, pid, a + w, NULL); if (d[w] == -1U && errno) goto err; } + errno = old_errno; return 0; err: return -2; From c19c153644ad6fff19c9628f56e63aba7a8ea4a5 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:35 +0000 Subject: [PATCH 126/277] lib/ptrace: Be more elaborate about failures Also, don't use the magic -2 => return errno on failure. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/ptrace.c | 46 ++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/compel/src/lib/ptrace.c b/compel/src/lib/ptrace.c index 715e564df8..4c3530c853 100644 --- a/compel/src/lib/ptrace.c +++ b/compel/src/lib/ptrace.c @@ -36,50 +36,72 @@ int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes) unsigned long w; int old_errno = errno; - if (bytes & (sizeof(long) - 1)) + if (bytes & (sizeof(long) - 1)) { + pr_err("Peek request with non-word size %ld\n", bytes); return -1; + } errno = 0; for (w = 0; w < bytes / sizeof(long); w++) { unsigned long *d = dst, *a = addr; d[w] = ptrace(PTRACE_PEEKDATA, pid, a + w, NULL); - if (d[w] == -1U && errno) + if (d[w] == -1U && errno) { + pr_perror("PEEKDATA failed"); goto err; + } } errno = old_errno; return 0; err: - return -2; + return -errno; } int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes) { unsigned long w; - if (bytes & (sizeof(long) - 1)) + + if (bytes & (sizeof(long) - 1)) { + pr_err("Poke request with non-word size %ld\n", bytes); return -1; + } + for (w = 0; w < bytes / sizeof(long); w++) { unsigned long *s = src, *a = addr; - if (ptrace(PTRACE_POKEDATA, pid, a + w, s[w])) + + if (ptrace(PTRACE_POKEDATA, pid, a + w, s[w])) { + pr_perror("POKEDATA failed"); goto err; + } } return 0; err: - return -2; + return -errno; } /* don't swap big space, it might overflow the stack */ int ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes) { void *t = alloca(bytes); + int err; - if (ptrace_peek_area(pid, t, dst, bytes)) - return -1; + err = ptrace_peek_area(pid, t, dst, bytes); + if (err) + return err; - if (ptrace_poke_area(pid, src, dst, bytes)) { - if (ptrace_poke_area(pid, t, dst, bytes)) - return -2; - return -1; + err = ptrace_poke_area(pid, src, dst, bytes); + if (err) { + int err2; + + pr_err("Can't poke %d @ %p from %p sized %ld\n", + pid, dst, src, bytes); + + err2 = ptrace_poke_area(pid, t, dst, bytes); + if (err2) { + pr_err("Can't restore the original data with poke\n"); + return err2; + } + return err; } memcpy(src, t, bytes); From 038f3eba861b4fc67f117ee92743398a8b633df4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:36 +0000 Subject: [PATCH 127/277] compel/infect: Warn if close() failed on memfd As a preparation for __must_check on compel_syscall(), check it on close() too - maybe not as useful as with other syscalls, but why not. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/infect.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index f0bcaf334f..f726a9895b 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -718,14 +718,25 @@ static int parasite_mmap_exchange(struct parasite_ctl *ctl, unsigned long size) return 0; } +static void parasite_memfd_close(struct parasite_ctl *ctl, int fd) +{ + bool __maybe_unused compat = !compel_mode_native(ctl); + long ret; + int err; + + err = compel_syscall(ctl, __NR(close, compat), &ret, fd, 0, 0, 0, 0, 0); + if (err || ret) + pr_err("Can't close memfd\n"); +} + static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) { void *where = (void *)ctl->ictx.syscall_ip + BUILTIN_SYSCALL_SIZE; + bool __maybe_unused compat_task = !compel_mode_native(ctl); uint8_t orig_code[MEMFD_FNAME_SZ] = MEMFD_FNAME; pid_t pid = ctl->rpid; long sret = -ENOSYS; int ret, fd, lfd; - bool __maybe_unused compat_task = !compel_mode_native(ctl); if (ctl->ictx.flags & INFECT_NO_MEMFD) return 1; @@ -741,10 +752,9 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) (unsigned long)where, 0, 0, 0, 0, 0); if (ptrace_poke_area(pid, orig_code, where, sizeof(orig_code))) { - fd = (int)(long)sret; + fd = (int)sret; if (fd >= 0) - compel_syscall(ctl, __NR(close, compat_task), &sret, - fd, 0, 0, 0, 0, 0); + parasite_memfd_close(ctl, fd); pr_err("Can't restore memfd args (pid: %d)\n", pid); return -1; } @@ -752,7 +762,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) if (ret < 0) return ret; - fd = (int)(long)sret; + fd = (int)sret; if (fd == -ENOSYS) return 1; if (fd < 0) { @@ -787,7 +797,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) goto err_curef; } - compel_syscall(ctl, __NR(close, compat_task), &sret, fd, 0, 0, 0, 0, 0); + parasite_memfd_close(ctl, fd); close(lfd); pr_info("Set up parasite blob using memfd\n"); @@ -796,7 +806,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) err_curef: close(lfd); err_cure: - compel_syscall(ctl, __NR(close, compat_task), &sret, fd, 0, 0, 0, 0, 0); + parasite_memfd_close(ctl, fd); return -1; } From eda9a113ed619e1dde8456ab748164d8d7f5b43c Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:37 +0000 Subject: [PATCH 128/277] compel: Mark compat argument of __NR() as used And remove __maybe_unused work-around. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/aarch64/src/lib/include/syscall.h | 2 +- compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/arch/arm/src/lib/include/syscall.h | 2 +- compel/arch/arm/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/arch/ppc64/src/lib/include/syscall.h | 2 +- compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/arch/s390/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/src/lib/infect.c | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/compel/arch/aarch64/src/lib/include/syscall.h b/compel/arch/aarch64/src/lib/include/syscall.h index e2ec1272ec..30290667af 100644 --- a/compel/arch/aarch64/src/lib/include/syscall.h +++ b/compel/arch/aarch64/src/lib/include/syscall.h @@ -1,4 +1,4 @@ #ifndef __COMPEL_SYSCALL_H__ #define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h index 4662f76897..7a33baa8ef 100644 --- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h @@ -27,6 +27,6 @@ typedef struct user_fpsimd_state user_fpregs_struct_t; #define ARCH_SI_TRAP TRAP_BRKPT -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/arm/src/lib/include/syscall.h b/compel/arch/arm/src/lib/include/syscall.h index e2ec1272ec..30290667af 100644 --- a/compel/arch/arm/src/lib/include/syscall.h +++ b/compel/arch/arm/src/lib/include/syscall.h @@ -1,4 +1,4 @@ #ifndef __COMPEL_SYSCALL_H__ #define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif diff --git a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h index b8286d4049..69222b251f 100644 --- a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h @@ -61,6 +61,6 @@ struct user_vfp_exc { #define ARCH_SI_TRAP TRAP_BRKPT -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/ppc64/src/lib/include/syscall.h b/compel/arch/ppc64/src/lib/include/syscall.h index e2ec1272ec..30290667af 100644 --- a/compel/arch/ppc64/src/lib/include/syscall.h +++ b/compel/arch/ppc64/src/lib/include/syscall.h @@ -1,4 +1,4 @@ #ifndef __COMPEL_SYSCALL_H__ #define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h index 89fc4aa3c4..126fa2ea31 100644 --- a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h @@ -81,6 +81,6 @@ typedef struct { #define ARCH_SI_TRAP TRAP_BRKPT -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h index fddf65d3b0..8171d33951 100644 --- a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h @@ -73,7 +73,7 @@ typedef struct { #define user_regs_native(pregs) true -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) struct mmap_arg_struct { unsigned long addr; diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index f726a9895b..656cc030d4 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -720,7 +720,7 @@ static int parasite_mmap_exchange(struct parasite_ctl *ctl, unsigned long size) static void parasite_memfd_close(struct parasite_ctl *ctl, int fd) { - bool __maybe_unused compat = !compel_mode_native(ctl); + bool compat = !compel_mode_native(ctl); long ret; int err; @@ -732,7 +732,7 @@ static void parasite_memfd_close(struct parasite_ctl *ctl, int fd) static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) { void *where = (void *)ctl->ictx.syscall_ip + BUILTIN_SYSCALL_SIZE; - bool __maybe_unused compat_task = !compel_mode_native(ctl); + bool compat_task = !compel_mode_native(ctl); uint8_t orig_code[MEMFD_FNAME_SZ] = MEMFD_FNAME; pid_t pid = ctl->rpid; long sret = -ENOSYS; From 0020f2a0253df4573a7055f54a410602c588a6cd Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:38 +0000 Subject: [PATCH 129/277] lib/infect: Check if compel succeed in executing munmap Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/s390/src/lib/infect.c | 4 +++- compel/src/lib/infect.c | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index 00e9c36d21..7e7d24ce21 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -453,8 +453,10 @@ void *remote_mmap(struct parasite_ctl *ctl, if (ptrace_poke_area(pid, &arg_struct, where, sizeof(arg_struct))) { pr_err("Can't restore mmap args (pid: %d)\n", pid); if (map != 0) { - compel_syscall(ctl, __NR_munmap, NULL, map, + err = compel_syscall(ctl, __NR_munmap, NULL, map, length, 0, 0, 0, 0); + if (err) + pr_err("Can't munmap %d\n", err); map = 0; } } diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 656cc030d4..8b377e7d22 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -1303,6 +1303,7 @@ int compel_stop_daemon(struct parasite_ctl *ctl) int compel_cure_remote(struct parasite_ctl *ctl) { long ret; + int err; if (compel_stop_daemon(ctl)) return -1; @@ -1310,9 +1311,12 @@ int compel_cure_remote(struct parasite_ctl *ctl) if (!ctl->remote_map) return 0; - compel_syscall(ctl, __NR(munmap, !compel_mode_native(ctl)), &ret, - (unsigned long)ctl->remote_map, ctl->map_length, - 0, 0, 0, 0); + err = compel_syscall(ctl, __NR(munmap, !compel_mode_native(ctl)), &ret, + (unsigned long)ctl->remote_map, ctl->map_length, + 0, 0, 0, 0); + if (err) + return err; + if (ret) { pr_err("munmap for remote map %p, %lu returned %lu\n", ctl->remote_map, ctl->map_length, ret); From 0998ac6af71f68558db446bc5354fe246f0cb93a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:39 +0000 Subject: [PATCH 130/277] cr-dump: Try to cure remote on err-pathes On daemon stop or threads dump failures it's still desired to remove parasite from the remote (if possible). Try best and keep hopeing. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 8aabb85b11..d8cc4f915e 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1386,16 +1386,20 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) ret = compel_stop_daemon(parasite_ctl); if (ret) { - pr_err("Can't cure (pid: %d) from parasite\n", pid); - goto err; + pr_err("Can't stop daemon in parasite (pid: %d)\n", pid); + goto err_cure; } ret = dump_task_threads(parasite_ctl, item); if (ret) { pr_err("Can't dump threads\n"); - goto err; + goto err_cure; } + /* + * On failure local map will be cured in cr_dump_finish() + * for lazy pages. + */ if (opts.lazy_pages) ret = compel_cure_remote(parasite_ctl); else @@ -1428,7 +1432,9 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) err_cure: close_cr_imgset(&cr_imgset); err_cure_imgset: - compel_cure(parasite_ctl); + ret = compel_cure(parasite_ctl); + if (ret) + pr_err("Can't cure (pid: %d) from parasite\n", pid); goto err; } From 341d781c35ad1e23b127bc942c7812064ccd1a27 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:40 +0000 Subject: [PATCH 131/277] cr-dump: Warn if unmapping local memfd failed Probably, not the worst that could happen, but still unexpected. Preparing the ground to make compel_cure*() functions __must_check. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index d8cc4f915e..6bdd28400d 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1538,7 +1538,8 @@ static int cr_pre_dump_finish(int status) timing_stop(TIME_MEMWRITE); destroy_page_pipe(mem_pp); - compel_cure_local(ctl); + if (compel_cure_local(ctl)) + pr_err("Can't cure local: something happened with mapping?\n"); } free_pstree(root_item); @@ -1670,7 +1671,8 @@ static int cr_lazy_mem_dump(void) for_each_pstree_item(item) { if (item->pid->state != TASK_DEAD) { destroy_page_pipe(dmpi(item)->mem_pp); - compel_cure_local(dmpi(item)->parasite_ctl); + if (compel_cure_local(dmpi(item)->parasite_ctl)) + pr_err("Can't cure local: something happened with mapping?\n"); } } From 0a4d747cac51c42ab7893fce9357265a45739c2d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:41 +0000 Subject: [PATCH 132/277] cr-restore: Warn if restorer can't be unmapped Too late to stop restore: it's already printed that restore was successful. Oh, well warn aloud about infection. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index de0b2cb407..41f78cb7a2 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1953,6 +1953,7 @@ static void finalize_restore(void) for_each_pstree_item(item) { pid_t pid = item->pid->real; struct parasite_ctl *ctl; + unsigned long restorer_addr; if (!task_alive(item)) continue; @@ -1962,7 +1963,9 @@ static void finalize_restore(void) if (ctl == NULL) continue; - compel_unmap(ctl, (unsigned long)rsti(item)->munmap_restorer); + restorer_addr = (unsigned long)rsti(item)->munmap_restorer; + if (compel_unmap(ctl, restorer_addr)) + pr_err("Failed to unmap restorer from %d\n", pid); xfree(ctl); From 281c9f330e13fa90b04ebbbe01ca3a9c2ae72dfe Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:42 +0000 Subject: [PATCH 133/277] parasite-syscall: Log if can't cure on failed infection Maybe expected, hopefully never happens - let's warn in any case. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/parasite-syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index b9788a4c28..e5a8194e58 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -565,7 +565,8 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item, parasite_ensure_args_size(aio_rings_args_size(vma_area_list)); if (compel_infect(ctl, item->nr_threads, parasite_args_size) < 0) { - compel_cure(ctl); + if (compel_cure(ctl)) + pr_warn("Can't cure failed infection\n"); return NULL; } From 9e998ad33b03a1671a42f4d15335cba7655ebe1f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:43 +0000 Subject: [PATCH 134/277] compel/infect: Detach but fail compel_resume_task() Unknown state means that the task in the end may be not in wanted state. Return err code. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/infect.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 8b377e7d22..3fad85ed3c 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -313,6 +313,8 @@ int compel_wait_task(int pid, int ppid, int compel_resume_task(pid_t pid, int orig_st, int st) { + int ret = 0; + pr_debug("\tUnseizing %d into %d\n", pid, st); if (st == COMPEL_TASK_DEAD) { @@ -335,15 +337,17 @@ int compel_resume_task(pid_t pid, int orig_st, int st) */ if (orig_st == COMPEL_TASK_STOPPED) kill(pid, SIGSTOP); - } else + } else { pr_err("Unknown final state %d\n", st); + ret = -1; + } if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) { pr_perror("Unable to detach from %d", pid); return -1; } - return 0; + return ret; } static int gen_parasite_saddr(struct sockaddr_un *saddr, int key) From 674bd13d2c957c8e727ae45d877a021924fe53fb Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:44 +0000 Subject: [PATCH 135/277] criu: Kill tasks even when the network is unlocked Currently if anything fails after network has been unlocked tasks aren't killed. Which doesn't work anyway: any stage sets `ret` and nothing later gets called. Which means the tasks aren't resumed properly. Furthermore, functions like catch_tasks() and compel_stop_on_syscall() return failure on the first error. Let's do the cleanup even when the network is unlocked. If we want to keep the mess and ignore failures - a cli option should be introduced for that (and existing code should be reworked with decisions what is critical and what can be ignored). Move "Restore finished successfully" message accordingly where everything is evidently good. While at here, any late failure will result not only in cleanup but in criu returning error code. Which in result makes tests to fail in such case: > ======================= Run zdtm/static/inotify04 in ns ======================== > Start test > ./inotify04 --pidfile=inotify04.pid --outfile=inotify04.out --dirname=inotify04.test > Run criu dump > =[log]=> dump/zdtm/static/inotify04/84/1/dump.log > ------------------------ grep Error ------------------------ > (00.119763) fsnotify: openable (inode match) as zdtm/static/inotify04.test/inotify-testfile > (00.119766) fsnotify: Dumping /zdtm/static/inotify04.test/inotify-testfile as path for handle > (00.119769) fsnotify: id 0x00000b flags 0x000800 > (00.119787) 88 fdinfo 5: pos: 0 flags: 4000/0 > (00.119796) Warn (criu/fsnotify.c:336): fsnotify: The 0x00000c inotify events will be dropped > ------------------------ ERROR OVER ------------------------ > Run criu restore > =[log]=> dump/zdtm/static/inotify04/84/1/restore.log > ------------------------ grep Error ------------------------ > (00.391582) 123 was stopped > (00.391667) 106 was trapped > (00.391674) 106 (native) is going to execute the syscall 11, required is 11 > (00.391697) 106 was stopped > (00.391720) Error (compel/src/lib/infect.c:1439): Task 123 is in unexpected state: b7f > (00.391736) Error (compel/src/lib/infect.c:1447): Task stopped with 11: Segmentation fault > ------------------------ ERROR OVER ------------------------ > 5: Old maps lost: set([]) > 5: New maps appeared: set([u'10000-1a000 rwxp', u'1a000-24000 rw-p']) > ############### Test zdtm/static/inotify04 FAIL at maps compare ################ > Send the 9 signal to 106 > Wait for zdtm/static/inotify04(106) to die for 0.100000 > ======================= Test zdtm/static/inotify04 PASS ======================== Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 50 ++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 41f78cb7a2..953f28e458 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1975,7 +1975,7 @@ static void finalize_restore(void) } } -static void finalize_restore_detach(int status) +static int finalize_restore_detach(void) { struct pstree_item *item; @@ -1989,16 +1989,21 @@ static void finalize_restore_detach(int status) for (i = 0; i < item->nr_threads; i++) { pid = item->threads[i].real; if (pid < 0) { - BUG_ON(status >= 0); - break; + pr_err("pstree item has unvalid pid %d\n", pid); + continue; } - if (arch_set_thread_regs_nosigrt(&item->threads[i])) + if (arch_set_thread_regs_nosigrt(&item->threads[i])) { pr_perror("Restoring regs for %d failed", pid); - if (ptrace(PTRACE_DETACH, pid, NULL, 0)) - pr_perror("Unable to execute %d", pid); + return -1; + } + if (ptrace(PTRACE_DETACH, pid, NULL, 0)) { + pr_perror("Unable to detach %d", pid); + return -1; + } } } + return 0; } static void ignore_kids(void) @@ -2256,32 +2261,37 @@ static int restore_root_task(struct pstree_item *init) /* * ------------------------------------------------------------- - * Below this line nothing should fail, because network is unlocked + * Network is unlocked. If something fails below - we lose data + * or a connection. */ attach_to_tasks(root_seized); - ret = restore_switch_stage(CR_STATE_RESTORE_CREDS); - BUG_ON(ret); + if (restore_switch_stage(CR_STATE_RESTORE_CREDS)) + goto out_kill_network_unlocked; timing_stop(TIME_RESTORE); - ret = catch_tasks(root_seized, &flag); + if (catch_tasks(root_seized, &flag)) { + pr_err("Can't catch all tasks\n"); + goto out_kill_network_unlocked; + } if (lazy_pages_finish_restore()) - goto out_kill; + goto out_kill_network_unlocked; - pr_info("Restore finished successfully. Resuming tasks.\n"); __restore_switch_stage(CR_STATE_COMPLETE); - if (ret == 0) - ret = compel_stop_on_syscall(task_entries->nr_threads, - __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag); + ret = compel_stop_on_syscall(task_entries->nr_threads, + __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag); + if (ret) { + pr_err("Can't stop all tasks on rt_sigreturn\n"); + goto out_kill_network_unlocked; + } if (clear_breakpoints()) pr_err("Unable to flush breakpoints\n"); - if (ret == 0) - finalize_restore(); + finalize_restore(); ret = run_scripts(ACT_PRE_RESUME); if (ret) @@ -2293,8 +2303,10 @@ static int restore_root_task(struct pstree_item *init) fini_cgroup(); /* Detaches from processes and they continue run through sigreturn. */ - finalize_restore_detach(ret); + if (finalize_restore_detach()) + goto out_kill_network_unlocked; + pr_info("Restore finished successfully. Tasks resumed.\n"); write_stats(RESTORE_STATS); ret = run_scripts(ACT_POST_RESUME); @@ -2306,6 +2318,8 @@ static int restore_root_task(struct pstree_item *init) return 0; +out_kill_network_unlocked: + pr_err("Killing processes because of failure on restore.\nThe Network was unlocked so some data or a connection may have been lost.\n"); out_kill: /* * The processes can be killed only when all of them have been created, From bcbe0af0fdd4472700b01b680e0ba82eb9b3eebf Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:45 +0000 Subject: [PATCH 136/277] compel/criu: Add __must_check All those compel functions can fail by various reasons. It may be status of the system, interruption by user or anything else. It's really desired to handle as many PIE related errors as possible otherwise it's hard to analyze statuses of parasite/restorer and the C/R process. At least warning for logs should be produced or even C/R stopped. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/include/uapi/cpu.h | 2 +- compel/include/uapi/infect-rpc.h | 6 ++-- compel/include/uapi/infect-util.h | 5 ++- compel/include/uapi/infect.h | 39 +++++++++++++----------- compel/include/uapi/ptrace.h | 7 +++-- compel/include/uapi/sigframe-common.h | 5 +-- compel/plugins/include/uapi/plugin-fds.h | 2 +- compel/plugins/include/uapi/std/infect.h | 8 +++-- compel/plugins/include/uapi/std/log.h | 1 + criu/seize.c | 2 +- include/common/compiler.h | 27 ++++++++++++++++ 11 files changed, 71 insertions(+), 33 deletions(-) diff --git a/compel/include/uapi/cpu.h b/compel/include/uapi/cpu.h index 6f827d4472..72c8a516c2 100644 --- a/compel/include/uapi/cpu.h +++ b/compel/include/uapi/cpu.h @@ -6,7 +6,7 @@ #include -extern int compel_cpuid(compel_cpuinfo_t *info); +extern int /* TODO: __must_check */ compel_cpuid(compel_cpuinfo_t *info); extern bool compel_cpu_has_feature(unsigned int feature); extern bool compel_fpu_has_feature(unsigned int feature); extern uint32_t compel_fpu_feature_size(unsigned int feature); diff --git a/compel/include/uapi/infect-rpc.h b/compel/include/uapi/infect-rpc.h index 0176c11425..180dedf1f6 100644 --- a/compel/include/uapi/infect-rpc.h +++ b/compel/include/uapi/infect-rpc.h @@ -6,9 +6,9 @@ #include struct parasite_ctl; -extern int compel_rpc_sync(unsigned int cmd, struct parasite_ctl *ctl); -extern int compel_rpc_call(unsigned int cmd, struct parasite_ctl *ctl); -extern int compel_rpc_call_sync(unsigned int cmd, struct parasite_ctl *ctl); +extern int __must_check compel_rpc_sync(unsigned int cmd, struct parasite_ctl *ctl); +extern int __must_check compel_rpc_call(unsigned int cmd, struct parasite_ctl *ctl); +extern int __must_check compel_rpc_call_sync(unsigned int cmd, struct parasite_ctl *ctl); extern int compel_rpc_sock(struct parasite_ctl *ctl); #define PARASITE_USER_CMDS 64 diff --git a/compel/include/uapi/infect-util.h b/compel/include/uapi/infect-util.h index 7307ba57a0..4e32d13dc4 100644 --- a/compel/include/uapi/infect-util.h +++ b/compel/include/uapi/infect-util.h @@ -1,6 +1,9 @@ #ifndef __COMPEL_INFECT_UTIL_H__ #define __COMPEL_INFECT_UTIL_H__ + +#include "common/compiler.h" + struct parasite_ctl; -extern int compel_util_send_fd(struct parasite_ctl *ctl, int fd); +extern int __must_check compel_util_send_fd(struct parasite_ctl *ctl, int fd); extern int compel_util_recv_fd(struct parasite_ctl *ctl, int *pfd); #endif diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h index 08beaffcdf..dd672bc1c9 100644 --- a/compel/include/uapi/infect.h +++ b/compel/include/uapi/infect.h @@ -13,7 +13,7 @@ #define PARASITE_START_AREA_MIN (4096) -extern int compel_interrupt_task(int pid); +extern int __must_check compel_interrupt_task(int pid); struct seize_task_status { unsigned long long sigpnd; @@ -23,27 +23,28 @@ struct seize_task_status { int seccomp_mode; }; -extern int compel_wait_task(int pid, int ppid, +extern int __must_check compel_wait_task(int pid, int ppid, int (*get_status)(int pid, struct seize_task_status *, void *data), void (*free_status)(int pid, struct seize_task_status *, void *data), struct seize_task_status *st, void *data); -extern int compel_stop_task(int pid); +extern int __must_check compel_stop_task(int pid); extern int compel_resume_task(pid_t pid, int orig_state, int state); struct parasite_ctl; struct parasite_thread_ctl; -extern struct parasite_ctl *compel_prepare(int pid); -extern struct parasite_ctl *compel_prepare_noctx(int pid); -extern int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned long args_size); -extern struct parasite_thread_ctl *compel_prepare_thread(struct parasite_ctl *ctl, int pid); +extern struct parasite_ctl __must_check *compel_prepare(int pid); +extern struct parasite_ctl __must_check *compel_prepare_noctx(int pid); +extern int __must_check compel_infect(struct parasite_ctl *ctl, + unsigned long nr_threads, unsigned long args_size); +extern struct parasite_thread_ctl __must_check *compel_prepare_thread(struct parasite_ctl *ctl, int pid); extern void compel_release_thread(struct parasite_thread_ctl *); -extern int compel_stop_daemon(struct parasite_ctl *ctl); -extern int compel_cure_remote(struct parasite_ctl *ctl); -extern int compel_cure_local(struct parasite_ctl *ctl); -extern int compel_cure(struct parasite_ctl *ctl); +extern int __must_check compel_stop_daemon(struct parasite_ctl *ctl); +extern int __must_check compel_cure_remote(struct parasite_ctl *ctl); +extern int __must_check compel_cure_local(struct parasite_ctl *ctl); +extern int __must_check compel_cure(struct parasite_ctl *ctl); #define PARASITE_ARG_SIZE_MIN ( 1 << 12) @@ -58,15 +59,16 @@ extern int compel_cure(struct parasite_ctl *ctl); extern void *compel_parasite_args_p(struct parasite_ctl *ctl); extern void *compel_parasite_args_s(struct parasite_ctl *ctl, unsigned long args_size); -extern int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, +extern int __must_check compel_syscall(struct parasite_ctl *ctl, + int nr, long *ret, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6); -extern int compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd); -extern int compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t *ret_regs); +extern int __must_check compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd); +extern int __must_check compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t *ret_regs); /* * The PTRACE_SYSCALL will trap task twice -- on @@ -80,12 +82,13 @@ enum trace_flags { TRACE_EXIT, }; -extern int compel_stop_on_syscall(int tasks, int sys_nr, +extern int __must_check compel_stop_on_syscall(int tasks, int sys_nr, int sys_nr_compat, enum trace_flags trace); -extern int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp); +extern int __must_check compel_stop_pie(pid_t pid, void *addr, + enum trace_flags *tf, bool no_bp); -extern int compel_unmap(struct parasite_ctl *ctl, unsigned long addr); +extern int __must_check compel_unmap(struct parasite_ctl *ctl, unsigned long addr); extern int compel_mode_native(struct parasite_ctl *ctl); @@ -159,7 +162,7 @@ struct parasite_blob_desc { extern struct parasite_blob_desc *compel_parasite_blob_desc(struct parasite_ctl *); -extern int compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *); +extern int __must_check compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *); extern void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *elf_relocs, size_t nr_relocs); diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h index 4df00b6e1b..13eed72328 100644 --- a/compel/include/uapi/ptrace.h +++ b/compel/include/uapi/ptrace.h @@ -1,6 +1,7 @@ #ifndef UAPI_COMPEL_PTRACE_H__ #define UAPI_COMPEL_PTRACE_H__ +#include "common/compiler.h" /* * We'd want to include both sys/ptrace.h and linux/ptrace.h, * hoping that most definitions come from either one or another. @@ -75,8 +76,8 @@ typedef struct { extern int ptrace_suspend_seccomp(pid_t pid); -extern int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes); -extern int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes); -extern int ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes); +extern int __must_check ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes); +extern int __must_check ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes); +extern int __must_check ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes); #endif /* UAPI_COMPEL_PTRACE_H__ */ diff --git a/compel/include/uapi/sigframe-common.h b/compel/include/uapi/sigframe-common.h index fc93c5480b..177bf4c48a 100644 --- a/compel/include/uapi/sigframe-common.h +++ b/compel/include/uapi/sigframe-common.h @@ -8,6 +8,7 @@ # error "Direct inclusion is forbidden, use instead" #endif +#include "common/compiler.h" #include #include @@ -56,7 +57,7 @@ struct rt_ucontext { unsigned long uc_regspace[128] __attribute__((aligned(8))); }; -extern int sigreturn_prep_fpu_frame(struct rt_sigframe *frame, - struct rt_sigframe *rframe); +extern int __must_check sigreturn_prep_fpu_frame(struct rt_sigframe *frame, + struct rt_sigframe *rframe); #endif /* UAPI_COMPEL_SIGFRAME_COMMON_H__ */ diff --git a/compel/plugins/include/uapi/plugin-fds.h b/compel/plugins/include/uapi/plugin-fds.h index cececb21d9..e995b4b66a 100644 --- a/compel/plugins/include/uapi/plugin-fds.h +++ b/compel/plugins/include/uapi/plugin-fds.h @@ -1,7 +1,7 @@ #ifndef COMPEL_PLUGIN_STD_STD_H__ #define COMPEL_PLUGIN_STD_STD_H__ -extern int fds_send_fd(int fd); +extern int __must_check fds_send_fd(int fd); extern int fds_recv_fd(void); #endif /* COMPEL_PLUGIN_STD_STD_H__ */ diff --git a/compel/plugins/include/uapi/std/infect.h b/compel/plugins/include/uapi/std/infect.h index 800df25095..1e784f8b43 100644 --- a/compel/plugins/include/uapi/std/infect.h +++ b/compel/plugins/include/uapi/std/infect.h @@ -1,14 +1,16 @@ #ifndef COMPEL_PLUGIN_STD_INFECT_H__ #define COMPEL_PLUGIN_STD_INFECT_H__ +#include "common/compiler.h" + extern int parasite_get_rpc_sock(void); -extern int parasite_service(unsigned int cmd, void *args); +extern int __must_check parasite_service(unsigned int cmd, void *args); /* * Must be supplied by user plugins. */ -extern int parasite_daemon_cmd(int cmd, void *args); -extern int parasite_trap_cmd(int cmd, void *args); +extern int __must_check parasite_daemon_cmd(int cmd, void *args); +extern int __must_check parasite_trap_cmd(int cmd, void *args); extern void parasite_cleanup(void); /* diff --git a/compel/plugins/include/uapi/std/log.h b/compel/plugins/include/uapi/std/log.h index f21b6df0d9..91462c85b7 100644 --- a/compel/plugins/include/uapi/std/log.h +++ b/compel/plugins/include/uapi/std/log.h @@ -2,6 +2,7 @@ #define COMPEL_PLUGIN_STD_LOG_H__ #include "compel/loglevels.h" +#include "common/compiler.h" #define STD_LOG_SIMPLE_CHUNK 256 diff --git a/criu/seize.c b/criu/seize.c index cce8911b92..e1e6b81956 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -483,7 +483,7 @@ static int collect_children(struct pstree_item *item) if (!opts.freeze_cgroup) /* fails when meets a zombie */ - compel_interrupt_task(pid); + __ignore_value(compel_interrupt_task(pid)); ret = compel_wait_task(pid, item->pid->real, parse_pid_status, NULL, &creds.s, NULL); if (ret < 0) { diff --git a/include/common/compiler.h b/include/common/compiler.h index fc8abcfef4..1d431a5293 100644 --- a/include/common/compiler.h +++ b/include/common/compiler.h @@ -22,6 +22,7 @@ #define __used __attribute__((__used__)) #define __maybe_unused __attribute__((unused)) #define __always_unused __attribute__((unused)) +#define __must_check __attribute__((__warn_unused_result__)) #define __section(S) __attribute__ ((__section__(#S))) @@ -99,4 +100,30 @@ #define is_log2(v) (((v) & ((v) - 1)) == 0) +/* + * Use "__ignore_value" to avoid a warning when using a function declared with + * gcc's warn_unused_result attribute, but for which you really do want to + * ignore the result. Traditionally, people have used a "(void)" cast to + * indicate that a function's return value is deliberately unused. However, + * if the function is declared with __attribute__((warn_unused_result)), + * gcc issues a warning even with the cast. + * + * Caution: most of the time, you really should heed gcc's warning, and + * check the return value. However, in those exceptional cases in which + * you're sure you know what you're doing, use this function. + * + * Normally casting an expression to void discards its value, but GCC + * versions 3.4 and newer have __attribute__ ((__warn_unused_result__)) + * which may cause unwanted diagnostics in that case. Use __typeof__ + * and __extension__ to work around the problem, if the workaround is + * known to be needed. + * Written by Jim Meyering, Eric Blake and Pádraig Brady. + * (See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425 for the details) + */ +#if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) +# define __ignore_value(x) ({ __typeof__ (x) __x = (x); (void) __x; }) +#else +# define __ignore_value(x) ((void) (x)) +#endif + #endif /* __CR_COMPILER_H__ */ From 715a105aae6f2bfcb1bf8831845df79c959b0236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Wed, 6 Nov 2019 02:15:20 +0100 Subject: [PATCH 137/277] Checkpoint only specified controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this change CRIU would checkpoint all controllers, even the ones not specified in --cgroup-dump-controller. That becomes a problem if there's a cgroup controller on the checkpointing machine that doesn't exist on the restoring machine even if CRIU is instructed not to dump that controller. After that change everything works as expected. Signed-off-by: MichaÅ‚ CÅ‚apiÅ„ski --- criu/proc_parse.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index d67392a120..fa7644992b 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -2498,6 +2498,12 @@ int collect_controllers(struct list_head *cgroups, unsigned int *n_cgroups) goto err; } *off = '\0'; + + if (cgp_should_skip_controller(controllers)) { + pr_debug("cg-prop: Skipping controller %s\n", controllers); + continue; + } + while (1) { off = strchr(controllers, ','); if (off) From 078efaef3755b805643b8056e02f62a72e3e08f9 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 17 Nov 2019 16:04:16 +0200 Subject: [PATCH 138/277] travis: group lazy-pages options The amount of lazy-pages options keeps growing, let's put the common ones into a variable. Signed-off-by: Mike Rapoport --- scripts/travis/travis-tests | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index bc97fd4559..a87ddbaf46 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -146,10 +146,11 @@ fi LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps04" LAZY_TESTS=.*\(maps0\|uffd-events\|lazy-thp\|futex\|fork\).* +LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $ZDTM_OPTS" -./test/zdtm.py run -p 2 -T $LAZY_TESTS --lazy-pages $LAZY_EXCLUDE $ZDTM_OPTS -./test/zdtm.py run -p 2 -T $LAZY_TESTS --remote-lazy-pages $LAZY_EXCLUDE $ZDTM_OPTS -./test/zdtm.py run -p 2 -T $LAZY_TESTS --remote-lazy-pages --tls $LAZY_EXCLUDE $ZDTM_OPTS +./test/zdtm.py run $LAZY_OPTS --lazy-pages +./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages +./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages --tls bash ./test/jenkins/criu-fault.sh bash ./test/jenkins/criu-fcg.sh From f5c8c9e62a93d1b71dcd3f23960e31197f3e3115 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 17 Nov 2019 16:05:47 +0200 Subject: [PATCH 139/277] travis: exclude uns tests for lazy-pages on newer kernels Kernels 5.4 and higher will restrict availability of UFFD_EVENT_FORK only for users with SYS_CAP_PTRACE. This prevents running --lazy-pages tests with 'uns' flavor. Disable 'uns' for lazy pages testing in travis for newer kernels. Signed-off-by: Mike Rapoport --- scripts/travis/travis-tests | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index a87ddbaf46..4cb842c973 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -145,8 +145,15 @@ else fi LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps04" +# Starting with 5.4 kernel requires SYS_CAP_PTRACE to use uffd events; as such +# we cannot run lazy-pages tests in uns +LAZY_FLAVORS="" +if [ $KERN_MAJ -ge "5" ] && [ $KERN_MIN -ge "4" ]; then + LAZY_FLAVORS = "-f h,ns" +fi + LAZY_TESTS=.*\(maps0\|uffd-events\|lazy-thp\|futex\|fork\).* -LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $ZDTM_OPTS" +LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $LAZY_FLAVORS $ZDTM_OPTS" ./test/zdtm.py run $LAZY_OPTS --lazy-pages ./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages From 4c1ff11d1b2f54162fcaaec602db68876fa22f0d Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 3 Nov 2019 20:18:38 +0000 Subject: [PATCH 140/277] mount: Add error messages Suggested-by: Andrei Vagin Signed-off-by: Radostin Stoyanov --- criu/mount.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 974af6eb22..6b1adecc65 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -1325,8 +1325,10 @@ int ns_open_mountpoint(void *arg) } /* Remount all mounts as private to disable propagation */ - if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) + if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) { + pr_perror("Unable to remount"); goto err; + } if (umount_overmounts(mi)) goto err; @@ -1536,6 +1538,7 @@ static __maybe_unused int mount_cr_time_mount(struct ns_id *ns, unsigned int *s_ ret = mount(source, target, type, 0, NULL); if (ret < 0) { + pr_perror("Unable to mount %s %s", source, target); exit_code = -errno; goto restore_ns; } else { @@ -2004,7 +2007,10 @@ static int fetch_rt_stat(struct mount_info *m, const char *where) static int do_simple_mount(struct mount_info *mi, const char *src, const char *fstype, unsigned long mountflags) { - return mount(src, mi->mountpoint, fstype, mountflags, mi->options); + int ret = mount(src, mi->mountpoint, fstype, mountflags, mi->options); + if (ret) + pr_perror("Unable to mount %s %s (id=%d)", src, mi->mountpoint, mi->mnt_id); + return ret; } static char *mnt_fsname(struct mount_info *mi) @@ -2491,8 +2497,11 @@ static int do_mount_one(struct mount_info *mi) } /* do_mount_root() is called from populate_mnt_ns() */ - if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL)) + if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL)) { + pr_perror("Unable to mount %s %s (id=%d)", opts.root, mi->mountpoint, mi->mnt_id); return -1; + } + if (do_mount_root(mi)) return -1; mi->mounted = true; From 1b31164dff1c06230aaf38eff79e37a88a7cef69 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 9 Nov 2019 22:48:32 +0000 Subject: [PATCH 141/277] mount: Bind-mount root via userns_call When restoring a runc container with enabled user namespace CRIU fails to mount the specified root directory because the path is under /run/runc which is inaccessible to unprivileged users. Signed-off-by: Radostin Stoyanov --- criu/mount.c | 51 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 6b1adecc65..52e70d3767 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2020,20 +2020,20 @@ static char *mnt_fsname(struct mount_info *mi) return mi->fstype->name; } -static int apply_sb_flags(void *args, int fd, pid_t pid) +static int userns_mount(char *src, void *args, int fd, pid_t pid) { unsigned long flags = *(unsigned long *) args; int rst = -1, err = -1; - char path[PSFDS]; + char target[PSFDS]; - snprintf(path, sizeof(path), "/proc/self/fd/%d", fd); + snprintf(target, sizeof(target), "/proc/self/fd/%d", fd); if (pid != getpid() && switch_ns(pid, &mnt_ns_desc, &rst)) return -1; - err = mount(NULL, path, NULL, MS_REMOUNT | flags, NULL); + err = mount(src, target, NULL, flags, NULL); if (err) - pr_perror("Unable to remount %s", path); + pr_perror("Unable to mount %s", target); if (rst >= 0 && restore_ns(rst, &mnt_ns_desc)) return -1; @@ -2041,6 +2041,16 @@ static int apply_sb_flags(void *args, int fd, pid_t pid) return err; } +static int apply_sb_flags(void *args, int fd, pid_t pid) +{ + return userns_mount(NULL, args, fd, pid); +} + +static int mount_root(void *args, int fd, pid_t pid) +{ + return userns_mount(opts.root, args, fd, pid); +} + static int do_new_mount(struct mount_info *mi) { unsigned long sflags = mi->sb_flags; @@ -2088,10 +2098,9 @@ static int do_new_mount(struct mount_info *mi) pr_perror("Unable to open %s", mi->mountpoint); return -1; } - sflags |= MS_RDONLY; - if (userns_call(apply_sb_flags, 0, - &sflags, sizeof(sflags), fd)) { - pr_perror("Unable to apply mount flags %d for %s", + sflags |= MS_RDONLY | MS_REMOUNT; + if (userns_call(apply_sb_flags, 0, &sflags, sizeof(sflags), fd)) { + pr_err("Unable to apply mount flags %d for %s", mi->sb_flags, mi->mountpoint); close(fd); return -1; @@ -2491,15 +2500,33 @@ static int do_mount_one(struct mount_info *mi) pr_debug("\tMounting %s @%s (%d)\n", mi->fstype->name, mi->mountpoint, mi->need_plugin); if (rst_mnt_is_root(mi)) { + int fd; + unsigned long flags = MS_BIND | MS_REC; + if (opts.root == NULL) { pr_err("The --root option is required to restore a mount namespace\n"); return -1; } /* do_mount_root() is called from populate_mnt_ns() */ - if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL)) { - pr_perror("Unable to mount %s %s (id=%d)", opts.root, mi->mountpoint, mi->mnt_id); - return -1; + if (root_ns_mask & CLONE_NEWUSER) { + fd = open(mi->mountpoint, O_PATH); + if (fd < 0) { + pr_perror("Unable to open %s", mi->mountpoint); + return -1; + } + + if (userns_call(mount_root, 0, &flags, sizeof(flags), fd)) { + pr_err("Unable to mount %s\n", mi->mountpoint); + close(fd); + return -1; + } + close(fd); + } else { + if (mount(opts.root, mi->mountpoint, NULL, flags, NULL)) { + pr_perror("Unable to mount %s %s (id=%d)", opts.root, mi->mountpoint, mi->mnt_id); + return -1; + } } if (do_mount_root(mi)) From 91884a14cb346f8f0c18afe4ce237afa112d3083 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 10 Nov 2019 07:35:50 +0000 Subject: [PATCH 142/277] restore: Create temp proc in /tmp When restoring a container with user namespace, CRIU fails to create a temporary directory for proc. The is because the unprivileged user that has been just restored does not have permissions to access the working directory used by CRIU. Resolves #828 Signed-off-by: Radostin Stoyanov --- criu/cr-restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 953f28e458..fad1b38795 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1589,7 +1589,7 @@ static void restore_pgid(void) static int mount_proc(void) { int fd, ret; - char proc_mountpoint[] = "crtools-proc.XXXXXX"; + char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; if (root_ns_mask == 0) fd = ret = open("/proc", O_DIRECTORY); From 6f2301015cd880a5b671e49a8740f719eac709dd Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 3 Nov 2019 20:35:18 +0000 Subject: [PATCH 143/277] man: Describe --root option requirements These requirements have been described in https://github.com/opencontainers/runc/blob/b133feae/libcontainer/container_linux.go#L1265 Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 2729bc95a3..133a094c0f 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -414,6 +414,8 @@ usually need to be escaped from shell. *-r*, *--root* 'path':: Change the root filesystem to 'path' (when run in a mount namespace). + This option is required to restore a mount namespace. The directory + 'path' must be a mount point and its parent must not be overmounted. *--external* 'type'*[*'id'*]:*'value':: Restore an instance of an external resource. The generic syntax is From 49a0fb04db603932cdd299b5c2ac5316a2e14b66 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 19 Nov 2019 22:10:39 +0000 Subject: [PATCH 144/277] zdtm: Replace if->continue with if->elif->else Replacing the if->continue pattern with if->elif->else reduces the number of lines while preserving the logic. Signed-off-by: Radostin Stoyanov --- test/zdtm.py | 63 ++++++++++++++++++---------------------------------- 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 25328104ee..c7e6f4ddfc 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -867,76 +867,57 @@ class criu_rpc: def __set_opts(criu, args, ctx): while len(args) != 0: arg = args.pop(0) - if arg == '-v4': + if "-v4" == arg: criu.opts.log_level = 4 - continue - if arg == '-o': + elif "-o" == arg: criu.opts.log_file = args.pop(0) - continue - if arg == '-D': + elif "-D" == arg: criu.opts.images_dir_fd = os.open(args.pop(0), os.O_DIRECTORY) ctx['imgd'] = criu.opts.images_dir_fd - continue - if arg == '-t': + elif "-t" == arg: criu.opts.pid = int(args.pop(0)) - continue - if arg == '--pidfile': + elif "--pidfile" == arg: ctx['pidf'] = args.pop(0) - continue - if arg == '--timeout': + elif "--timeout" == arg: criu.opts.timeout = int(args.pop(0)) - continue - if arg == '--restore-detached': - # Set by service by default - ctx['rd'] = True - continue - if arg == '--root': + elif "--restore-detached" == arg: + ctx['rd'] = True # Set by service by default + elif "--root" == arg: criu.opts.root = args.pop(0) - continue - if arg == '--external': + elif "--external" == arg: criu.opts.external.append(args.pop(0)) - continue - if arg == '--status-fd': + elif "--status-fd" == arg: fd = int(args.pop(0)) os.write(fd, b"\0") fcntl.fcntl(fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) - continue - if arg == '--port': + elif "--port" == arg: criu.opts.ps.port = int(args.pop(0)) - continue - if arg == '--address': + elif "--address" == arg: criu.opts.ps.address = args.pop(0) + elif "--page-server" == arg: continue - if arg == '--page-server': - continue - if arg == '--prev-images-dir': + elif "--prev-images-dir" == arg: criu.opts.parent_img = args.pop(0) - continue - if arg == '--pre-dump-mode': + elif "--pre-dump-mode" == arg: key = args.pop(0) mode = crpc.rpc.VM_READ if key == "splice": mode = crpc.rpc.SPLICE criu.opts.pre_dump_mode = mode - continue - if arg == '--track-mem': + elif "--track-mem" == arg: criu.opts.track_mem = True - continue - if arg == '--tcp-established': + elif "--tcp-established" == arg: criu.opts.tcp_established = True - continue - if arg == '--restore-sibling': + elif "--restore-sibling" == arg: criu.opts.rst_sibling = True - continue - if arg == "--inherit-fd": + elif "--inherit-fd" == arg: inhfd = criu.opts.inherit_fd.add() key = args.pop(0) fd, key = key.split(":", 1) inhfd.fd = int(fd[3:-1]) inhfd.key = key - continue - - raise test_fail_exc('RPC for %s(%s) required' % (arg, args.pop(0))) + else: + raise test_fail_exc('RPC for %s(%s) required' % (arg, args.pop(0))) @staticmethod def run(action, From 7b970be59bfe35fa883495898a15e4f2d9936b7c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 19 Nov 2019 22:48:44 +0000 Subject: [PATCH 145/277] zdtm: Set --root path to 0700 on restore Update zdtm tests to verify that CRIU does not require the --root path to be accessible to the unprivileged user being restored when restoring user namespace. Signed-off-by: Radostin Stoyanov --- test/zdtm.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index c7e6f4ddfc..de6b376884 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -62,6 +62,7 @@ def traceit(f, e, a): def clean_tests_root(): global tests_root if tests_root and tests_root[0] == os.getpid(): + os.rmdir(os.path.join(tests_root[1], "root")) os.rmdir(tests_root[1]) @@ -70,7 +71,9 @@ def make_tests_root(): if not tests_root: tests_root = (os.getpid(), tempfile.mkdtemp("", "criu-root-", "/tmp")) atexit.register(clean_tests_root) - return tests_root[1] + os.mkdir(os.path.join(tests_root[1], "root")) + os.chmod(tests_root[1], 0o777) + return os.path.join(tests_root[1], "root") # Report generation @@ -483,6 +486,13 @@ def start(self): # move into some semi-random state time.sleep(random.random()) + if self.__flavor.ns: + # In the case of runc the path specified with the opts.root + # option is created in /run/runc/ which is inaccessible to + # unprivileged users. The permissions here are set to test + # this use case. + os.chmod(os.path.dirname(self.__flavor.root), 0o700) + def kill(self, sig=signal.SIGKILL): self.__freezer.thaw() if self.__pid: From e7baaf11eda6dca8f388fc0dedbf5b7f871f6f46 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:34 +0000 Subject: [PATCH 146/277] build: Remove SRCARCH SRCARCH is always equal ARCH. There are no rules when to use one or another and architectures may forget to set one of them up. No need for a second variable meaning the same and confusing people. Remove it completely. Self-correction [after some debug]: SRCARCH was different in one place: zdtm Makefile by some unintentional mistake: > ifeq ($(ARCH),arm64) > ARCH ?= aarch64 > SRCARCH ?= aarch64 > endif That meant to be "ARCH := aarch64" because "?=" would never work inside that ifeq. Fix up this part of mess too. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Makefile | 3 +-- Makefile.config | 2 +- compel/plugins/Makefile | 4 ++-- criu/Makefile | 2 +- criu/pie/Makefile | 6 +++--- criu/pie/Makefile.library | 4 ++-- test/zdtm/Makefile.inc | 7 ++----- test/zdtm/static/Makefile | 4 ++-- 8 files changed, 14 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index f827e7baa7..2e62f6f39e 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,6 @@ endif # commit "S/390: Fix 64 bit sibcall". ifeq ($(ARCH),s390) ARCH := s390 - SRCARCH := s390 DEFINES := -DCONFIG_S390 CFLAGS_PIE := -fno-optimize-sibling-calls endif @@ -94,7 +93,7 @@ endif CFLAGS_PIE += -DCR_NOGLIBC export CFLAGS_PIE -LDARCH ?= $(SRCARCH) +LDARCH ?= $(ARCH) export LDARCH export PROTOUFIX DEFINES diff --git a/Makefile.config b/Makefile.config index 1e4352b9de..5af3fed389 100644 --- a/Makefile.config +++ b/Makefile.config @@ -30,7 +30,7 @@ CONFIG_FILE = .config $(CONFIG_FILE): touch $(CONFIG_FILE) -ifeq ($(SRCARCH),x86) +ifeq ($(ARCH),x86) # CONFIG_COMPAT is only for x86 now, no need for compile-test other archs ifeq ($(call try-asm,$(FEATURE_TEST_X86_COMPAT)),true) export CONFIG_COMPAT := y diff --git a/compel/plugins/Makefile b/compel/plugins/Makefile index a326e2a661..197ff1b24e 100644 --- a/compel/plugins/Makefile +++ b/compel/plugins/Makefile @@ -53,11 +53,11 @@ std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/parasite-head.o target += fds fds-lib-y += fds/fds.o -ifeq ($(SRCARCH),x86) +ifeq ($(ARCH),x86) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o endif -ifeq ($(SRCARCH),ppc64) +ifeq ($(ARCH),ppc64) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcmp.o endif diff --git a/criu/Makefile b/criu/Makefile index 4134e5052e..ceb49ce099 100644 --- a/criu/Makefile +++ b/criu/Makefile @@ -2,7 +2,7 @@ # 6a8d90f5fec4 "attr: Allow attribute type 0" WRAPFLAGS += -Wl,--wrap=nla_parse,--wrap=nlmsg_parse -ARCH_DIR := criu/arch/$(SRCARCH) +ARCH_DIR := criu/arch/$(ARCH) PIE_DIR := criu/pie export ARCH_DIR PIE_DIR diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 1ad456f430..a30747ac30 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -14,7 +14,7 @@ ifneq ($(filter-out clean mrproper,$(MAKECMDGOALS)),) compel_plugins := $(shell $(COMPEL_BIN) plugins) endif -LDS := compel/arch/$(SRCARCH)/scripts/compel-pack.lds.S +LDS := compel/arch/$(ARCH)/scripts/compel-pack.lds.S restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o restorer-obj-y += ./$(ARCH_DIR)/restorer.o @@ -26,11 +26,11 @@ ifeq ($(ARCH),x86) endif endif -ifeq ($(SRCARCH),aarch64) +ifeq ($(ARCH),aarch64) restorer-obj-y += ./$(ARCH_DIR)/intraprocedure.o endif -ifeq ($(SRCARCH),ppc64) +ifeq ($(ARCH),ppc64) restorer-obj-y += ./$(ARCH_DIR)/vdso-trampoline.o endif diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index 658c8a4eb8..de75b11d46 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -9,14 +9,14 @@ lib-name := pie.lib.a lib-y += util.o lib-y += util-vdso.o -ifeq ($(SRCARCH),x86) +ifeq ($(ARCH),x86) ifeq ($(CONFIG_COMPAT),y) lib-y += util-vdso-elf32.o endif CFLAGS_util-vdso-elf32.o += -DCONFIG_VDSO_32 endif -ifeq ($(SRCARCH),arm) +ifeq ($(ARCH),arm) lib-y += ./$(ARCH_DIR)/aeabi-helpers.o lib-y += ./$(ARCH_DIR)/pie-cacheflush.o endif diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 170f31632e..d5c013a3e8 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -15,12 +15,9 @@ ARCH ?= $(shell uname -m | sed \ -e s/aarch64.*/arm64/) ifeq ($(ARCH),arm64) - ARCH ?= aarch64 - SRCARCH ?= aarch64 + ARCH := aarch64 endif -SRCARCH ?= $(ARCH) - ifeq ($(ARCH),arm) ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') @@ -35,7 +32,7 @@ CC := gcc CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE -CPPFLAGS += -iquote $(LIBDIR)/arch/$(SRCARCH)/include +CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include ifeq ($(strip $(V)),) E = @echo diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index a38482f44e..e0d4d2c5cb 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -219,13 +219,13 @@ TST_NOFILE := \ child_subreaper_and_reparent \ # jobctl00 \ -ifneq ($(SRCARCH),arm) +ifneq ($(ARCH),arm) ifneq ($(COMPAT_TEST),y) TST_NOFILE += maps03 endif endif -ifeq ($(SRCARCH),s390) +ifeq ($(ARCH),s390) TST_NOFILE += s390x_regs_check \ s390x_gs_threads \ s390x_runtime_instr From 70a5b0c84bbddb3f917a4bc74bb53622b4dda902 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:35 +0000 Subject: [PATCH 147/277] build/nmk: Remove SRCARCH It's not used anywhere now. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- scripts/nmk/scripts/include.mk | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk index e1701103f9..ee0e32f62b 100644 --- a/scripts/nmk/scripts/include.mk +++ b/scripts/nmk/scripts/include.mk @@ -22,9 +22,8 @@ SUBARCH := $(shell uname -m | sed \ -e s/aarch64.*/aarch64/) ARCH ?= $(SUBARCH) -SRCARCH := $(ARCH) -export SUBARCH ARCH SRCARCH +export SUBARCH ARCH ifndef ____nmk_defined__tools include $(__nmk_dir)tools.mk From da58a1085b43a9fe6f1d9d9349e0b0adcf1b4c4c Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:36 +0000 Subject: [PATCH 148/277] build/nmk: Provide proper SUBARCH It's always equal ARCH and not very useful (so nothing actually uses it). Time for a change: SUBARCH now is meaningful and gives a way to detect what kind of ARCH flavor build is dealing with. Also, for cross-compiling sake don't set SUBARCH if the user supplied it. (and don't call useless uname during cross compilation) Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- scripts/nmk/scripts/include.mk | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk index ee0e32f62b..c1c1e94af4 100644 --- a/scripts/nmk/scripts/include.mk +++ b/scripts/nmk/scripts/include.mk @@ -8,21 +8,20 @@ endif # # Common vars. -SUBARCH := $(shell uname -m | sed \ - -e s/i.86/x86/ \ - -e s/x86_64/x86/ \ - -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ \ - -e s/sa110/arm/ \ - -e s/s390x/s390/ \ - -e s/parisc64/parisc/ \ - -e s/ppc64.*/ppc64/ \ - -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ \ +SUBARCH ?= $(shell uname -m) +ARCH ?= $(shell echo $(SUBARCH) | sed \ + -e s/i.86/x86/ \ + -e s/x86_64/x86/ \ + -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ \ + -e s/sa110/arm/ \ + -e s/s390x/s390/ \ + -e s/parisc64/parisc/ \ + -e s/ppc64.*/ppc64/ \ + -e s/mips.*/mips/ \ + -e s/sh[234].*/sh/ \ -e s/aarch64.*/aarch64/) -ARCH ?= $(SUBARCH) - export SUBARCH ARCH ifndef ____nmk_defined__tools From 1f09aba83e8b13ec2cd4f0fe7a7777cc8545e038 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:37 +0000 Subject: [PATCH 149/277] build: Use SUBARCH Instead of doing additional `uname -m` - use provided $(SUBARCH) to detect what architecture flavour the build should produce the result for. Fixes two things: - zdtm make now correctly supplies $(USERCFLAGS) - subtly fixes cross compilation by providing a way to specify $(SUBARCH) Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Makefile | 6 ++---- test/zdtm/Makefile.inc | 25 +++++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 2e62f6f39e..ef76d706c2 100644 --- a/Makefile +++ b/Makefile @@ -17,8 +17,6 @@ ifeq ($(origin HOSTCFLAGS), undefined) HOSTCFLAGS := $(CFLAGS) $(USERCFLAGS) endif -UNAME-M := $(shell uname -m) - # # Supported Architectures ifneq ($(filter-out x86 arm aarch64 ppc64 s390,$(ARCH)),) @@ -27,14 +25,14 @@ endif # The PowerPC 64 bits architecture could be big or little endian. # They are handled in the same way. -ifeq ($(UNAME-M),ppc64) +ifeq ($(SUBARCH),ppc64) error := $(error ppc64 big endian is not yet supported) endif # # Architecture specific options. ifeq ($(ARCH),arm) - ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') + ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index d5c013a3e8..7584d3b060 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -1,17 +1,18 @@ .SUFFIXES: MAKEFLAGS += -r -ARCH ?= $(shell uname -m | sed \ - -e s/i.86/x86/ \ - -e s/x86_64/x86/ \ - -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ \ - -e s/sa110/arm/ \ - -e s/s390x/s390/ \ - -e s/parisc64/parisc/ \ - -e s/ppc64.*/ppc64/ \ - -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ \ +SUBARCH ?= $(shell uname -m) +ARCH ?= $(shell echo $(SUBARCH) | sed \ + -e s/i.86/x86/ \ + -e s/x86_64/x86/ \ + -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ \ + -e s/sa110/arm/ \ + -e s/s390x/s390/ \ + -e s/parisc64/parisc/ \ + -e s/ppc64.*/ppc64/ \ + -e s/mips.*/mips/ \ + -e s/sh[234].*/sh/ \ -e s/aarch64.*/arm64/) ifeq ($(ARCH),arm64) @@ -19,7 +20,7 @@ ifeq ($(ARCH),arm64) endif ifeq ($(ARCH),arm) - ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') + ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 From 37eb8bf24d936944cc351a5bedb50a1917254410 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:38 +0000 Subject: [PATCH 150/277] build/zdtm: Support cross-build Maybe not that useful, but only little change needed. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/Makefile.inc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 7584d3b060..8f2650b440 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -29,7 +29,10 @@ ifeq ($(ARCH),arm) endif endif -CC := gcc +HOSTCC ?= gcc +ifeq ($(origin CC), default) + CC := $(CROSS_COMPILE)$(HOSTCC) +endif CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE From f57727043ced138dc9049ffaa5e73c0eb36194df Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:39 +0000 Subject: [PATCH 151/277] build/zdtm: Makefile hack for travis aarch64/armv8l The very same hack to build aarch32 zdtm tests on armv8 Travis-CI as in the commit dfa0a1edcbcb ("Makefile hack for travis aarch64/armv8l") Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/Makefile.inc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 8f2650b440..d132ca9817 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -20,13 +20,17 @@ ifeq ($(ARCH),arm64) endif ifeq ($(ARCH),arm) - ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') - - ifeq ($(ARMV),6) - USERCFLAGS += -march=armv6 - else ifeq ($(ARMV),7) - USERCFLAGS += -march=armv7-a - endif + ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') + + ifeq ($(ARMV),6) + USERCFLAGS += -march=armv6 + else ifeq ($(ARMV),7) + USERCFLAGS += -march=armv7-a + else ifeq ($(ARMV),8) + # To build aarch32 on armv8 Travis-CI (see criu Makefile) + USERCFLAGS += -march=armv7-a + ARMV := 7 + endif endif HOSTCC ?= gcc From 3e09b0b1d3b03f2af98705f94dd579a92d984f43 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:40 +0000 Subject: [PATCH 152/277] build/zdtm: Use pkg-config to find includes/libs Helps to cross-compile zdtm tests in case somebody needs it. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/Makefile.inc | 13 +++++++++++++ test/zdtm/static/Makefile | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index d132ca9817..32fc72d320 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -53,12 +53,25 @@ endif RM := rm -f --one-file-system ifeq ($(COMPAT_TEST),y) + # Firstly look for 32-bit libs and then in standard path. + PKG_CONFIG_PATH := $(shell pkg-config --variable pc_path pkg-config) + PKG_CONFIG_PATH := /usr/lib32/pkgconfig:$(PKG_CONFIG_PATH) ifeq ($(ARCH),x86) export CFLAGS += -m32 export LDFLAGS += -m32 + PKG_CONFIG_PATH := /usr/lib/i386-linux-gnu/pkgconfig:$(PKG_CONFIG_PATH) endif + export PKG_CONFIG_PATH endif +define pkg-libs + $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" pkg-config --libs $(1)) +endef + +define pkg-cflags + $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" pkg-config --cflags $(1)) +endef + %.d: %.c $(E) " DEP " $@ $(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -MM -MP -c $< -o $@ diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index e0d4d2c5cb..36d00ca5c8 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -528,8 +528,8 @@ stopped12: CFLAGS += -DZDTM_STOPPED_KILL -DZDTM_STOPPED_TKILL clone_fs: LDLIBS += -pthread # As generating dependencies won't work without proper includes, # we have to explicitly specify both .o and .d for this case: -netns_sub_veth.o netns_sub_veth.d: CPPFLAGS += -I/usr/include/libnl3 -netns_sub_veth: LDLIBS += -lnl-3 -l nl-route-3 +netns_sub_veth.o netns_sub_veth.d: CPPFLAGS += $(call pkg-cflags, libnl-3.0) +netns_sub_veth: LDLIBS += $(call pkg-libs, libnl-route-3.0 libnl-3.0) socket-tcp-fin-wait1: CFLAGS += -D ZDTM_TCP_FIN_WAIT1 socket-tcp-fin-wait2: CFLAGS += -D ZDTM_TCP_FIN_WAIT2 From c3e1157526f67054785d4f76185ac43d5250e7fd Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:41 +0000 Subject: [PATCH 153/277] travis: Add armv7-cross as cross-compile test Fixes: #455 Based-on-patch-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- .travis.yml | 4 +++ scripts/build/Dockerfile.armv7-cross | 44 ++++++++++++++++++++++++++++ scripts/build/Makefile | 1 + 3 files changed, 49 insertions(+) create mode 100644 scripts/build/Dockerfile.armv7-cross diff --git a/.travis.yml b/.travis.yml index f6f71be48c..b27dbfe7b0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -73,6 +73,10 @@ matrix: arch: amd64 env: TR_ARCH=fedora-asan dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=armv7-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide diff --git a/scripts/build/Dockerfile.armv7-cross b/scripts/build/Dockerfile.armv7-cross new file mode 100644 index 0000000000..434934aad1 --- /dev/null +++ b/scripts/build/Dockerfile.armv7-cross @@ -0,0 +1,44 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ + dpkg --add-architecture armhf && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-armhf \ + libbz2-dev:armhf \ + libexpat1-dev:armhf \ + ncurses-dev:armhf \ + libssl-dev:armhf \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:armhf \ + libprotobuf-dev:armhf \ + libnet-dev:armhf \ + libprotobuf-c-dev:armhf \ + libcap-dev:armhf \ + libaio-dev:armhf \ + libnl-route-3-dev:armhf + +ENV CROSS_TRIPLE=arm-linux-gnueabihf +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=arm \ + SUBARCH=armv7 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index a7c78e8bd4..d093ce76c9 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,6 +2,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker +TARGETS += armv7-cross all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all From 53018afa606d6ac07e0731ac06873c981309d93a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:42 +0000 Subject: [PATCH 154/277] Documentation: Add a hint about docker build The original/old guide probably doesn't work anymore: - the patch isn't accessible; - criu now depends on more libraries not only protobuf Still, keep it as it might be helpful for someone. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Documentation/HOWTO.cross-compile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/HOWTO.cross-compile b/Documentation/HOWTO.cross-compile index f1b17842b8..44b19dfea8 100644 --- a/Documentation/HOWTO.cross-compile +++ b/Documentation/HOWTO.cross-compile @@ -1,4 +1,10 @@ -This HOWTO explains how to cross-compile CRIU on x86 +How to cross-compile CRIU on x86: + +Use the Dockerfile provided: + scripts/build/Dockerfile.armv7-cross + +Historical guide how-to do it without docker container: +[Unsupported, may not work anymore!] 1. Download the protobuf sources. 2. Apply the patch http://16918.selcdn.ru/crtools/aarch64/0001-protobuf-added-the-support-for-the-acrchitecture-AAr.patch From 455e8f4b0de6d72f86f3eac989c9829425388bd0 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:43 +0000 Subject: [PATCH 155/277] zdtm/socket-tcp-fin-wait1: Use array index fro TEST_MSG Fixes the following compile-error: > CC socket-tcp-fin-wait1.o > socket-tcp-fin-wait1.c:144:26: error: adding 'int' to a string does not append to the string [-Werror,-Wstring-plus-int] > if (write(fd, TEST_MSG + 2, sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { > ~~~~~~~~~^~~ > socket-tcp-fin-wait1.c:144:26: note: use array indexing to silence this warning > if (write(fd, TEST_MSG + 2, sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { > ^ > & [ ] > 1 error generated. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/static/socket-tcp-fin-wait1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm/static/socket-tcp-fin-wait1.c b/test/zdtm/static/socket-tcp-fin-wait1.c index 6c7cc93e56..50da9c1528 100644 --- a/test/zdtm/static/socket-tcp-fin-wait1.c +++ b/test/zdtm/static/socket-tcp-fin-wait1.c @@ -141,7 +141,7 @@ int main(int argc, char **argv) return 1; } - if (write(fd, TEST_MSG + 2, sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { + if (write(fd, &TEST_MSG[2], sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { pr_err("write"); return 1; } From 5d451ee939ca4e64f16d3da656e7711f4bfcee7f Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Thu, 7 Nov 2019 14:38:42 +0530 Subject: [PATCH 156/277] Add File-based Java Functional Tests Signed-off-by: Nidhi Gupta --- test/javaTests/README.md | 8 + .../criu/java/tests/CheckpointRestore.java | 4 +- .../src/org/criu/java/tests/FileRead.java | 2 +- .../src/org/criu/java/tests/Helper.java | 39 +++- .../org/criu/java/tests/MemoryMappings.java | 121 +++++++++++ .../org/criu/java/tests/MultipleFileRead.java | 203 ++++++++++++++++++ .../criu/java/tests/MultipleFileWrite.java | 140 ++++++++++++ .../src/org/criu/java/tests/ReadWrite.java | 119 ++++++++++ test/javaTests/test.xml | 30 +++ 9 files changed, 659 insertions(+), 7 deletions(-) create mode 100644 test/javaTests/src/org/criu/java/tests/MemoryMappings.java create mode 100644 test/javaTests/src/org/criu/java/tests/MultipleFileRead.java create mode 100644 test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java create mode 100644 test/javaTests/src/org/criu/java/tests/ReadWrite.java diff --git a/test/javaTests/README.md b/test/javaTests/README.md index cb779285ed..6707416779 100644 --- a/test/javaTests/README.md +++ b/test/javaTests/README.md @@ -23,6 +23,14 @@ CAP_SETUID Here we test the File-Based Java APIs by checkpointing the application in the following scenarios and verifying the contents of the file after restore: - Reading and writing in the same file. (FileRead.java) +- Read from a file and write its content to another file. (ReadWrite.java) +- Reading from multiple files and writing their content to another file. (MultipleFileRead) +- Reading from a file and writing its content to multiple files. (MultipleFileWrite) + +## Memory mapping Java APIs + +Here we test the Memory Mapping APIs by checkpointing the application in following scenario and verifying the contents after restore: +- Memory-mapping a file and writing its content to another file. (MemoryMappings.java) ### Prerequisites for running the tests: - Maven diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java index 968488191d..b848c9938e 100644 --- a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -77,7 +77,7 @@ void suiteSetup() throws IOException { private void testSetup(String testName) throws IOException { Path testFolderPath = Paths.get(outputFolder + testName + "/"); if (!Files.exists(testFolderPath)) { - System.out.println("Test Folder does not exist creating it"); + System.out.println("Creating the test folder"); Files.createDirectory(testFolderPath); } } @@ -245,7 +245,7 @@ public void runtest(String testName, String checkpointOpt, String restoreOpt) th Assert.assertNotEquals(currentState, Helper.STATE_TERMINATE, testName + ": ERROR: Checkpoint-Restore failed"); Assert.assertNotEquals(currentState, Helper.STATE_FAIL, testName + ": ERROR: Test Failed, Check Log for details"); Assert.assertEquals(currentState, Helper.STATE_PASS, testName + " ERROR: Unexpected State of Mapped Buffer"); - System.out.println("-----" + "PASS" + "-----"); + System.out.println("----- " + "PASS" + " -----"); } diff --git a/test/javaTests/src/org/criu/java/tests/FileRead.java b/test/javaTests/src/org/criu/java/tests/FileRead.java index d94a14112a..d8851a73ed 100644 --- a/test/javaTests/src/org/criu/java/tests/FileRead.java +++ b/test/javaTests/src/org/criu/java/tests/FileRead.java @@ -50,7 +50,7 @@ public static void main(String[] args) { /* * Mapped Byte Buffer should be in init state at the beginning of test */ - if ('I' != b.getChar(Helper.MAPPED_INDEX)) { + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); System.exit(1); diff --git a/test/javaTests/src/org/criu/java/tests/Helper.java b/test/javaTests/src/org/criu/java/tests/Helper.java index d608fba47d..fdf20bb521 100644 --- a/test/javaTests/src/org/criu/java/tests/Helper.java +++ b/test/javaTests/src/org/criu/java/tests/Helper.java @@ -1,9 +1,6 @@ package org.criu.java.tests; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; +import java.io.*; import java.nio.MappedByteBuffer; import java.util.logging.FileHandler; import java.util.logging.Level; @@ -96,4 +93,38 @@ static void checkpointAndWait(MappedByteBuffer b, Logger logger) { System.exit(1); } } + + + /** + * Compare two files and return true if their content is similar. + * + * @param readFile File 1 whose content has to be compared. + * @param writeFile File 2 whose content has to be compared. + * @return true if the files are similar, false otherwise. + * @throws IOException + */ + static boolean compare(File readFile, File writeFile) throws IOException { + BufferedReader bir = new BufferedReader(new FileReader(readFile)); + BufferedReader bor = new BufferedReader(new FileReader(writeFile)); + String si, so; + si = bir.readLine(); + so = bor.readLine(); + while (null != si && null != so) { + if (!si.equals(so)) { + return false; + } + + si = bir.readLine(); + so = bor.readLine(); + } + + if ((null == si) && (null == so)) { + return true; + } + bir.close(); + bor.close(); + + return false; + } + } diff --git a/test/javaTests/src/org/criu/java/tests/MemoryMappings.java b/test/javaTests/src/org/criu/java/tests/MemoryMappings.java new file mode 100644 index 0000000000..4ac6f4a17f --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/MemoryMappings.java @@ -0,0 +1,121 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class MemoryMappings { + private static String TESTNAME = "MemoryMappings"; + + /** + * Map a file to memory and write the mapped data into a file, + * checkpointing and restoring in between. + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + Logger logger = null; + + try { + MappedByteBuffer testBuffer; + char ch; + int i = 1; + boolean similar; + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + File readFile = new File(Helper.SOURCE_FOLDER + "/" + "ReadWrite.java"); + File writeFile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + "MemoryMappings_file.txt"); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of file to be memory mapped"); + if (!readFile.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + channel = FileChannel.open(readFile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + testBuffer = channel.map(MapMode.READ_WRITE, 0, readFile.length()); + channel.close(); + + if (writeFile.exists()) { + writeFile.delete(); + } + boolean newFile = writeFile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedWriter brw = new BufferedWriter(new FileWriter(writeFile)); + + while (testBuffer.hasRemaining()) { + ch = (char) testBuffer.get(); + brw.write(ch); + i++; + if (200 == i) { + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + } + + brw.close(); + logger.log(Level.INFO, "Comparing contents of the file"); + + similar = Helper.compare(readFile, writeFile); + if (!similar) { + logger.log(Level.SEVERE, "Error: Files are not similar after writing"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Data was read and written correctly!"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + brw.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/MultipleFileRead.java b/test/javaTests/src/org/criu/java/tests/MultipleFileRead.java new file mode 100644 index 0000000000..7b023673e0 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/MultipleFileRead.java @@ -0,0 +1,203 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class MultipleFileRead { + private static String TESTNAME = "MultipleFileRead"; + + /** + * @param readFile1 File 1 whose contents are read. + * @param readFile2 File 2 whose contents are read. + * @param writeFile File in which data has been written to. + * @return true if the data written is as expected, false otherwise. + * @throws IOException + */ + private static boolean compare(File readFile1, File readFile2, File writeFile) throws IOException { + BufferedReader br1 = new BufferedReader(new FileReader(readFile1)); + BufferedReader br2 = new BufferedReader(new FileReader(readFile2)); + BufferedReader brw = new BufferedReader(new FileReader(writeFile)); + boolean eof1, eof2; + eof1 = false; + eof2 = false; + String inpString, wrtString; + + while (!eof1 || !eof2) { + if (!eof1) { + inpString = br1.readLine(); + if (null == inpString) { + eof1 = true; + } else { + wrtString = brw.readLine(); + if (null == wrtString) { + return false; + } + if (!wrtString.equals(inpString)) { + return false; + } + } + } + if (!eof2) { + inpString = br2.readLine(); + if (null == inpString) { + eof2 = true; + } else { + wrtString = brw.readLine(); + if (null == wrtString) { + return false; + } + if (!wrtString.equals(inpString)) { + return false; + } + } + } + } + + wrtString = brw.readLine(); + if (null != wrtString) { + return false; + } + + br1.close(); + br2.close(); + brw.close(); + + return true; + } + + /** + * Read from multiple files and write their content into another file, + * checkpointing and restoring in between. + * + * @param args Not used. + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + String s; + int i = 0; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + File readFile1 = new File(Helper.SOURCE_FOLDER + "/" + "FileRead.java"); + File readFile2 = new File(Helper.SOURCE_FOLDER + "/" + "ReadWrite.java"); + File writeFile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + "MultipleFileRead_file.txt"); + boolean eofFile1 = false, eofFile2 = false, check; + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of the read files"); + + if (!readFile1.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (!readFile2.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (writeFile.exists()) { + writeFile.delete(); + } + logger.log(Level.INFO, "Creating writeFile"); + boolean newFile = writeFile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedReader br1 = new BufferedReader(new FileReader(readFile1)); + BufferedReader br2 = new BufferedReader(new FileReader(readFile2)); + BufferedWriter brw = new BufferedWriter(new FileWriter(writeFile)); + + logger.log(Level.INFO, "Writing in file"); + + while (!eofFile1 || !eofFile2) { + if (!eofFile1) { + s = br1.readLine(); + i++; + if (null == s) { + eofFile1 = true; + } else { + brw.write(s + "\n"); + } + } + if (!eofFile2) { + s = br2.readLine(); + i++; + if (null == s) { + eofFile2 = true; + } else { + brw.write(s + "\n"); + } + } + if (10 == i) { + /* + * Checkpoint and Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + } + brw.flush(); + logger.log(Level.INFO, "Checking the content of the file"); + check = compare(readFile1, readFile2, writeFile); + + if (!check) { + logger.log(Level.SEVERE, "Error: Files are not similar after writing"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "The file has been written as expected"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + br1.close(); + br2.close(); + brw.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java b/test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java new file mode 100644 index 0000000000..76d287a07c --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java @@ -0,0 +1,140 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class MultipleFileWrite { + private static String TESTNAME = "MultipleFileWrite"; + + /** + * Reads from a file and write its content into multiple files, + * checkpointing and restoring in between. + * + * @param args Not used. + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + String s, pid; + int i = 1; + Logger logger = null; + boolean similar1, similar2; + try { + File readFile = new File(Helper.SOURCE_FOLDER + "/" + "FileRead.java"); + File writeFile1 = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + TESTNAME + "1_file.txt"); + File writeFile2 = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + TESTNAME + "2_file.txt"); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of read files!"); + + if (!readFile.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (writeFile1.exists()) { + writeFile1.delete(); + } + boolean newFile = writeFile1.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + if (writeFile2.exists()) { + writeFile2.delete(); + } + newFile = writeFile2.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Created write files"); + + BufferedReader br = new BufferedReader(new FileReader(readFile)); + BufferedWriter bw1 = new BufferedWriter(new FileWriter(writeFile1)); + BufferedWriter bw2 = new BufferedWriter(new FileWriter(writeFile2)); + + s = br.readLine(); + + while (null != s) { + bw1.write(s + "\n"); + bw2.write(s + "\n"); + if (90 == i) { + /* + * Checkpoint and Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + + i++; + s = br.readLine(); + } + + bw1.flush(); + bw2.flush(); + logger.log(Level.INFO, "Checking files have been written correctly"); + + similar1 = Helper.compare(readFile, writeFile1); + similar2 = Helper.compare(readFile, writeFile2); + + if (!similar1 || !similar2) { + logger.log(Level.SEVERE, "Error: Written data is not identical to the data read"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Content of files is as expected"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + br.close(); + bw1.close(); + bw2.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/ReadWrite.java b/test/javaTests/src/org/criu/java/tests/ReadWrite.java new file mode 100644 index 0000000000..fa98447ed7 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/ReadWrite.java @@ -0,0 +1,119 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class ReadWrite { + private static String TESTNAME = "ReadWrite"; + + /** + * Read from a file and write its content into another file, + * checkpointing and restoring in between. + * + * @param args Not used. + */ + public static void main(String[] args) { + int i = 0; + String s, pid; + boolean similar; + MappedByteBuffer b = null; + Logger logger = null; + try { + File readFile = new File(Helper.SOURCE_FOLDER + "/" + "FileRead.java"); + File writeFile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + "ReadWrite_file.txt"); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of files to be read!"); + if (!readFile.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + if (writeFile.exists()) { + writeFile.delete(); + } + logger.log(Level.INFO, "Creating the writeFile"); + boolean newFile = writeFile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedReader brr = new BufferedReader(new FileReader(readFile)); + BufferedWriter brw = new BufferedWriter(new FileWriter(writeFile)); + logger.log(Level.INFO, "Start writing"); + + s = brr.readLine(); + + while (null != s) { + i++; + brw.write(s + "\n"); + + if (50 == i) { + /* + * Checkpoint and Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + s = brr.readLine(); + } + + brw.flush(); + logger.log(Level.INFO, "Checking content of the files."); + similar = Helper.compare(readFile, writeFile); + + if (!similar) { + logger.log(Level.SEVERE, "Error: Files are not similar after writing"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Content of file is as expected"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/test.xml b/test/javaTests/test.xml index 8ff67c5e0d..b73a31db29 100644 --- a/test/javaTests/test.xml +++ b/test/javaTests/test.xml @@ -4,10 +4,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 4405944d108e5cf32ad1807248c7ead7ce91d98c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 20 Nov 2019 11:01:33 +0300 Subject: [PATCH 157/277] travis: ignore fails of podman-test until it will not be fixed. Signed-off-by: Andrei Vagin --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index b27dbfe7b0..e6e4101911 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,6 +81,7 @@ matrix: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide - env: TR_ARCH=local GCOV=1 + - env: TR_ARCH=podman-test script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH after_success: From 41bcadac81faacb5ec5af22f4cb0b43f64917567 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 21 Nov 2019 01:24:44 +0300 Subject: [PATCH 158/277] kerndat: check whether the new mount API is supported of not Signed-off-by: Andrei Vagin --- Makefile.config | 2 +- .../arch/arm/plugins/std/syscalls/syscall.def | 3 ++ .../plugins/std/syscalls/syscall-ppc64.tbl | 3 ++ .../plugins/std/syscalls/syscall-s390.tbl | 3 ++ .../x86/plugins/std/syscalls/syscall_32.tbl | 3 ++ .../x86/plugins/std/syscalls/syscall_64.tbl | 3 ++ criu/include/kerndat.h | 1 + criu/include/linux/mount.h | 35 +++++++++++++++++++ criu/kerndat.c | 16 +++++++++ scripts/feature-tests.mak | 12 +++++++ 10 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 criu/include/linux/mount.h diff --git a/Makefile.config b/Makefile.config index 5af3fed389..81aae24f81 100644 --- a/Makefile.config +++ b/Makefile.config @@ -47,7 +47,7 @@ export DEFINES += $(FEATURE_DEFINES) export CFLAGS += $(FEATURE_DEFINES) FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \ - SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW + SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG # $1 - config name define gen-feature-test diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index 721ff16dc0..d5bdc677e2 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -112,3 +112,6 @@ userfaultfd 282 388 (int flags) fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len) cacheflush ! 983042 (void *start, void *end, int flags) ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +fsopen 430 430 (char *fsname, unsigned int flags) +fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux) +fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 3b30790402..4e283d5e93 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -108,3 +108,6 @@ __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) __NR_preadv 320 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_userfaultfd 364 sys_userfaultfd (int flags) __NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index cc13a63dd5..fd48e39507 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -108,3 +108,6 @@ __NR_userfaultfd 355 sys_userfaultfd (int flags) __NR_preadv 328 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) __NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl index 7903ab150a..038aeb4f75 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -96,3 +96,6 @@ __NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char __NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 374 sys_userfaultfd (int flags) __NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 4ac9164ea1..215f320267 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -107,3 +107,6 @@ __NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1 __NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 323 sys_userfaultfd (int flags) __NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index d93e07813f..771195860c 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -65,6 +65,7 @@ struct kerndat_s { bool x86_has_ptrace_fpu_xsave_bug; bool has_inotify_setnextwd; bool has_kcmp_epoll_tfd; + bool has_fsopen; }; extern struct kerndat_s kdat; diff --git a/criu/include/linux/mount.h b/criu/include/linux/mount.h new file mode 100644 index 0000000000..aa6be69ec6 --- /dev/null +++ b/criu/include/linux/mount.h @@ -0,0 +1,35 @@ +#ifndef _CRIU_LINUX_MOUNT_H +#define _CRIU_LINUX_MOUNT_H + +#include "common/config.h" +#include "compel/plugins/std/syscall-codes.h" + +#ifdef CONFIG_HAS_FSCONFIG +#include +#else +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ +}; +#endif + +static inline int sys_fsopen(const char *fsname, unsigned int flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} +static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux) +{ + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +} +static inline int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) +{ + return syscall(__NR_fsmount, fd, flags, attr_flags); +} + +#endif diff --git a/criu/kerndat.c b/criu/kerndat.c index 39cacb8fef..b0dd831356 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -723,6 +723,20 @@ static int kerndat_has_inotify_setnextwd(void) return ret; } +static int kerndat_has_fsopen(void) +{ + if (syscall(__NR_fsopen, NULL, -1) != -1) { + pr_err("fsopen should fail\n"); + return -1; + } + if (errno == ENOSYS) + pr_info("The new mount API (fsopen, fsmount) isn't supported\n"); + else + kdat.has_fsopen = true; + + return 0; +} + static int has_kcmp_epoll_tfd(void) { kcmp_epoll_slot_t slot = { }; @@ -1043,6 +1057,8 @@ int kerndat_init(void) ret = kerndat_has_inotify_setnextwd(); if (!ret) ret = has_kcmp_epoll_tfd(); + if (!ret) + ret = kerndat_has_fsopen(); kerndat_lsm(); kerndat_mmap_min_addr(); diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index e39d97bb11..39ddfd0533 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -136,3 +136,15 @@ ENTRY(main) nop END(main) endef + +define FEATURE_TEST_FSCONFIG + +#include + +int main(void) +{ + if (FSCONFIG_CMD_CREATE > 0) + return 0; + return 0; +} +endef From cb1f6022d754c5a9e511e41876aa14eb720a9bc1 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:50:08 +0300 Subject: [PATCH 159/277] util: introduce the mount_detached_fs helper Signed-off-by: Andrei Vagin --- criu/include/util.h | 2 ++ criu/util.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/criu/include/util.h b/criu/include/util.h index a14be72293..778b1b1197 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -378,4 +378,6 @@ static inline void print_stack_trace(pid_t pid) {} ___ret; \ }) +extern int mount_detached_fs(const char *fsname); + #endif /* __CR_UTIL_H__ */ diff --git a/criu/util.c b/criu/util.c index 2a3d7abcab..8cd9f38dc8 100644 --- a/criu/util.c +++ b/criu/util.c @@ -27,6 +27,8 @@ #include #include +#include "linux/mount.h" + #include "kerndat.h" #include "page.h" #include "util.h" @@ -1372,3 +1374,27 @@ void print_stack_trace(pid_t pid) free(strings); } #endif + +int mount_detached_fs(const char *fsname) +{ + int fsfd, fd; + + fsfd = sys_fsopen(fsname, 0); + if (fsfd < 0) { + pr_perror("Unable to open the %s file system", fsname); + return -1; + } + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) { + pr_perror("Unable to create the %s file system", fsname); + close(fsfd); + return -1; + } + + fd = sys_fsmount(fsfd, 0, 0); + if (fd < 0) + pr_perror("Unable to mount the %s file system", fsname); + close(fsfd); + return fd; +} + From 08a86c7378bf45c67bb4aae863c2acd3aa0a0e8d Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 21 Nov 2019 01:26:38 +0300 Subject: [PATCH 160/277] mount: use new mount API to open the proc file system It doesn't require to create a temporary directory and mount the proc file system in it. Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index fad1b38795..2ace4d078d 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -23,6 +23,8 @@ #include #include "common/compiler.h" +#include "linux/mount.h" + #include "clone-noasan.h" #include "cr_options.h" #include "servicefd.h" @@ -1586,27 +1588,39 @@ static void restore_pgid(void) futex_set_and_wake(&rsti(current)->pgrp_set, 1); } +static int __legacy_mount_proc() +{ + char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; + int fd; + + if (mkdtemp(proc_mountpoint) == NULL) { + pr_perror("mkdtemp failed %s", proc_mountpoint); + return -1; + } + + pr_info("Mount procfs in %s\n", proc_mountpoint); + if (mount("proc", proc_mountpoint, "proc", MS_MGC_VAL | MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL)) { + pr_perror("mount failed"); + if (rmdir(proc_mountpoint)) + pr_perror("Unable to remove %s", proc_mountpoint); + return -1; + } + + fd = open_detach_mount(proc_mountpoint); + return fd; +} + static int mount_proc(void) { int fd, ret; - char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; if (root_ns_mask == 0) fd = ret = open("/proc", O_DIRECTORY); else { - if (mkdtemp(proc_mountpoint) == NULL) { - pr_perror("mkdtemp failed %s", proc_mountpoint); - return -1; - } - - pr_info("Mount procfs in %s\n", proc_mountpoint); - if (mount("proc", proc_mountpoint, "proc", MS_MGC_VAL | MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL)) { - pr_perror("mount failed"); - rmdir(proc_mountpoint); - return -1; - } - - ret = fd = open_detach_mount(proc_mountpoint); + if (kdat.has_fsopen) + fd = ret = mount_detached_fs("proc"); + else + fd = ret = __legacy_mount_proc(); } if (fd >= 0) { From 2a4fffb01dcb4101279298fc9a644789a3b6dc05 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:51:40 +0300 Subject: [PATCH 161/277] net: use new mount API to open the sysfs file system It doesn't require to create a temporary directory and mount the proc file system in it. Signed-off-by: Andrei Vagin --- criu/net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/criu/net.c b/criu/net.c index 2285ae4032..75922fe68c 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2162,6 +2162,11 @@ static int mount_ns_sysfs(void) BUG_ON(ns_sysfs_fd != -1); + if (kdat.has_fsopen) { + ns_sysfs_fd = mount_detached_fs("sysfs"); + return ns_sysfs_fd >= 0 ? 0 : -1; + } + /* * A new mntns is required to avoid the race between * open_detach_mount and creating mntns. From e209cd63b354ac06071d63657a3f6eebc853f06e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:52:25 +0300 Subject: [PATCH 162/277] cgroup: use new mount API to open the cgroup file system It doesn't require to create a temporary directory and mount the proc file system in it. Signed-off-by: Andrei Vagin --- criu/cgroup.c | 105 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 82 insertions(+), 23 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index a66fc960e6..d4c7121673 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -8,6 +8,7 @@ #include #include #include + #include "common/list.h" #include "xmalloc.h" #include "cgroup.h" @@ -24,6 +25,8 @@ #include "protobuf.h" #include "images/core.pb-c.h" #include "images/cgroup.pb-c.h" +#include "kerndat.h" +#include "linux/mount.h" /* * This structure describes set of controller groups @@ -542,6 +545,84 @@ static int add_freezer_state(struct cg_controller *controller) return 0; } +static const char namestr[] = "name="; +static int __new_open_cgroupfs(struct cg_ctl *cc) +{ + int fsfd, fd; + char *name; + + fsfd = sys_fsopen("cgroup", 0); + if (fsfd < 0) { + pr_perror("Unable to open the cgroup file system"); + return -1; + } + + if (strstartswith(cc->name, namestr)) { + if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, + "name", cc->name + strlen(namestr), 0)) { + pr_perror("Unable to configure the cgroup (%s) file system", cc->name); + goto err; + } + } else { + char *saveptr = NULL, *buf = strdupa(cc->name); + name = strtok_r(buf, ",", &saveptr); + while (name) { + if (sys_fsconfig(fsfd, FSCONFIG_SET_FLAG, name, NULL, 0)) { + pr_perror("Unable to configure the cgroup (%s) file system", name); + goto err; + } + name = strtok_r(NULL, ",", &saveptr); + } + } + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) { + pr_perror("Unable to create the cgroup (%s) file system", cc->name); + goto err; + } + + fd = sys_fsmount(fsfd, 0, 0); + if (fd < 0) + pr_perror("Unable to mount the cgroup (%s) file system", cc->name); + close(fsfd); + + return fd; +err: + close(fsfd); + return -1; +} + +static int open_cgroupfs(struct cg_ctl *cc) +{ + char prefix[] = ".criu.cgmounts.XXXXXX"; + char mopts[1024]; + int fd; + + if (kdat.has_fsopen) + return __new_open_cgroupfs(cc); + + if (strstartswith(cc->name, namestr)) + snprintf(mopts, sizeof(mopts), "none,%s", cc->name); + else + snprintf(mopts, sizeof(mopts), "%s", cc->name); + + if (mkdtemp(prefix) == NULL) { + pr_perror("can't make dir for cg mounts"); + return -1; + } + + if (mount("none", prefix, "cgroup", 0, mopts) < 0) { + pr_perror("Unable to mount %s", mopts); + rmdir(prefix); + return -1; + } + + fd = open_detach_mount(prefix); + if (fd < 0) + return -1; + + return fd; +} + static int collect_cgroups(struct list_head *ctls) { struct cg_ctl *cc; @@ -550,8 +631,6 @@ static int collect_cgroups(struct list_head *ctls) list_for_each_entry(cc, ctls, l) { char path[PATH_MAX], *root; - char prefix[] = ".criu.cgmounts.XXXXXX"; - const char namestr[] = "name="; struct cg_controller *cg; struct cg_root_opt *o; @@ -603,27 +682,7 @@ static int collect_cgroups(struct list_head *ctls) return -1; } } else { - char mopts[1024]; - - if (strstartswith(cc->name, namestr)) - snprintf(mopts, sizeof(mopts), "none,%s", cc->name); - else - snprintf(mopts, sizeof(mopts), "%s", cc->name); - - if (mkdtemp(prefix) == NULL) { - pr_perror("can't make dir for cg mounts"); - return -1; - } - - if (mount("none", prefix, "cgroup", 0, mopts) < 0) { - pr_perror("couldn't mount %s", mopts); - rmdir(prefix); - return -1; - } - - fd = open_detach_mount(prefix); - if (fd < 0) - return -1; + fd = open_cgroupfs(cc); } path_pref_len = snprintf(path, PATH_MAX, "/proc/self/fd/%d", fd); From fd4d0e6b5772bda3972c31360e7782813195c2d3 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 26 Nov 2019 07:26:31 +0300 Subject: [PATCH 163/277] readme: github pull-requests is the preferred way to contribute We will continue accepting patches. Signed-off-by: Andrei Vagin --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 558e871601..6a578b9530 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Here are some useful hints to get involved. * CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; * Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); -* For historical reasons we do not accept PRs, instead [patches are welcome](http://criu.org/How_to_submit_patches); +* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [the devel list](http://criu.org/How_to_submit_patches); * Spread the word about CRIU in [social networks](http://criu.org/Contacts); * If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); From f728391d3633d1133ef3cd2ea5a20aaaade71ebe Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 12 Nov 2019 14:31:08 +0300 Subject: [PATCH 164/277] restorer/inotify: reorder inotify cleanup after waiting helpers and zombies We've seen ppoll interrupted with signal in VZ7 CT migration tests, that is because in the beggining of CR_STATE_RESTORE_SIGCHLD zombies and helpers die, and that can trigger SIGCHILDs sent to their parents. Adding additional debug (printing "Task..." for zombies and helpers) in sigchld_handler I see: (15.644339) pie: 1: Task 10718 exited, status= 0 (15.644349) pie: 1: Cleaning inotify events from 29 (15.644359) pie: 1: Cleaning inotify events from 19 (15.644367) pie: 1: Cleaning inotify events from 10 And previousely we had: (05.718449) pie: 104: Cleaning inotify events from 5 (05.718835) pie: 330: Cleaning inotify events from 3 (05.719046) pie: 1: Cleaning inotify events from 23 (05.719164) pie: 80: Cleaning inotify events from 7 (05.719185) pie: 1: Error (criu/pie/restorer.c:1287): Failed to poll from inotify fd: -4 (05.719202) pie: 95: Cleaning inotify events from 6 (05.719269) pie: 1: Error (criu/pie/restorer.c:1890): Restorer fail 1 So reordering cleanup and wait should fix it. Signed-off-by: Pavel Tikhomirov --- criu/pie/restorer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index dab58add6a..888eb8e650 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1839,9 +1839,6 @@ long __export_restore_task(struct task_restore_args *args) restore_finish_stage(task_entries_local, CR_STATE_RESTORE); - if (cleanup_current_inotify_events(args)) - goto core_restore_end; - if (wait_helpers(args) < 0) goto core_restore_end; if (wait_zombies(args) < 0) @@ -1854,6 +1851,9 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; } + if (cleanup_current_inotify_events(args)) + goto core_restore_end; + if (!args->compatible_mode) { ret = sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t)); From a1533cde0dc2e87e525c883e3d46f107cffe9530 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 29 Nov 2019 10:57:29 +0300 Subject: [PATCH 165/277] mount: Order call_helper_process calls When we do clone threads in a later stage of restore procedure it may race with helpers which do call clone_noasan by self. Thus we need to walk over each clone_noasan call and figure out if calling it without last_pid lock is safe. - open_mountpoint: called by fusectl_dump, dump_empty_fs, binfmt_misc_dump, tmpfs_dump -- they all are processing dump stage, thus safe - call_helper_process: try_remount_writable -- called from various places in reg-files.c, in particular open_reg_by_id called in parallel with other threads, needs a lock remount_readonly_mounts -- called from sigreturn_restore, so in parallel, needs a lock - call_in_child_process: prepare_net_namespaces -- called from prepare_namespace which runs before we start forking, no need for lock Thus call_helper_process should use lock_last_pid and unlock_last_pid helpers and wait for subprocess to finish. Same time put a warning text into clone_noasan comment so next time we need to use it we would recall the pitfalls. v2: - fix unitialized ret variable v3: - use exit_code instead of ret Signed-off-by: Cyrill Gorcunov --- criu/clone-noasan.c | 9 +++++++++ criu/mount.c | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index 5ca280eb83..5f1858d4d0 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -18,6 +18,15 @@ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69863 * * So the only way is to put this wrapper in separate non-instrumented file + * + * WARNING: When calling clone_noasan make sure your not sitting in a later + * __restore__ phase where other tasks might be creating threads, otherwise + * all calls to clone_noasan should be guarder with + * + * lock_last_pid + * clone_noasan + * ... wait for process to finish ... + * unlock_last_pid */ int clone_noasan(int (*fn)(void *), int flags, void *arg) { diff --git a/criu/mount.c b/criu/mount.c index 52e70d3767..24a8516c64 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -3738,27 +3738,38 @@ struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt"); static int call_helper_process(int (*call)(void *), void *arg) { - int pid, status; + int pid, status, exit_code = -1; + + /* + * Running new helper process on the restore must be + * done under last_pid mutex: other tasks may be restoring + * threads and the PID we need there might be occupied by + * this clone() call. + */ + lock_last_pid(); pid = clone_noasan(call, CLONE_VFORK | CLONE_VM | CLONE_FILES | CLONE_IO | CLONE_SIGHAND | CLONE_SYSVSEM, arg); if (pid == -1) { pr_perror("Can't clone helper process"); - return -1; + goto out; } errno = 0; if (waitpid(pid, &status, __WALL) != pid) { pr_perror("Unable to wait %d", pid); - return -1; + goto out; } if (status) { pr_err("Bad child exit status: %d\n", status); - return -1; + goto out; } - return 0; + exit_code = 0; +out: + unlock_last_pid(); + return exit_code; } static int ns_remount_writable(void *arg) From 55154963eb3cc6b6f0edb530f1e3562a32d4fd52 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 14 Nov 2019 14:41:04 +0300 Subject: [PATCH 166/277] unix: sysctl -- Preserve max_dgram_qlen value The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and we already noticed that systemd inside a container may change its value (for example it sets it to 512 by now instead of kernel's default value 10), thus we need keep it inside image and restore then. Signed-off-by: Cyrill Gorcunov Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- criu/net.c | 104 +++++++++++++++++++++++++++++++++++++++++++- images/netdev.proto | 1 + 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/criu/net.c b/criu/net.c index 75922fe68c..2e70d81615 100644 --- a/criu/net.c +++ b/criu/net.c @@ -210,6 +210,19 @@ char *devconfs6[] = { #define MAX_CONF_OPT_PATH IFNAMSIZ+60 #define MAX_STR_CONF_LEN 200 +static const char *unix_conf_entries[] = { + "max_dgram_qlen", +}; + +/* + * MAX_CONF_UNIX_PATH = (sizeof(CONF_UNIX_FMT) - strlen("%s")) + * + MAX_CONF_UNIX_OPT_PATH + */ +#define CONF_UNIX_BASE "net/unix" +#define CONF_UNIX_FMT CONF_UNIX_BASE"/%s" +#define MAX_CONF_UNIX_OPT_PATH 32 +#define MAX_CONF_UNIX_PATH (sizeof(CONF_UNIX_FMT) + MAX_CONF_UNIX_OPT_PATH - 2) + static int net_conf_op(char *tgt, SysctlEntry **conf, int n, int op, char *proto, struct sysctl_req *req, char (*path)[MAX_CONF_OPT_PATH], int size, char **devconfs, SysctlEntry **def_conf) @@ -339,6 +352,72 @@ static int ipv6_conf_op(char *tgt, SysctlEntry **conf, int n, int op, SysctlEntr devconfs6, def_conf); } +static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op) +{ + int i, ret = -1, flags = 0; + char path[ARRAY_SIZE(unix_conf_entries)][MAX_CONF_UNIX_PATH] = { }; + struct sysctl_req req[ARRAY_SIZE(unix_conf_entries)] = { }; + SysctlEntry **conf = *rconf; + + if (*n != ARRAY_SIZE(unix_conf_entries)) { + pr_err("unix: Unexpected entries in config (%zu %zu)\n", + *n, ARRAY_SIZE(unix_conf_entries)); + return -EINVAL; + } + + if (opts.weak_sysctls || op == CTL_READ) + flags = CTL_FLAGS_OPTIONAL; + + for (i = 0; i < *n; i++) { + snprintf(path[i], MAX_CONF_UNIX_PATH, CONF_UNIX_FMT, + unix_conf_entries[i]); + req[i].name = path[i]; + req[i].flags = flags; + + switch (conf[i]->type) { + case SYSCTL_TYPE__CTL_32: + req[i].type = CTL_32; + req[i].arg = &conf[i]->iarg; + break; + default: + pr_err("unix: Unknown config type %d\n", + conf[i]->type); + return -1; + } + } + + ret = sysctl_op(req, *n, op, CLONE_NEWNET); + if (ret < 0) { + pr_err("unix: Failed to %s %s/\n", + (op == CTL_READ) ? "read" : "write", + CONF_UNIX_BASE); + return -1; + } + + if (op == CTL_READ) { + bool has_entries = false; + + for (i = 0; i < *n; i++) { + if (req[i].flags & CTL_FLAGS_HAS) { + conf[i]->has_iarg = true; + if (!has_entries) + has_entries = true; + } + } + + /* + * Zap the whole section of data. + * Unix conf is optional. + */ + if (!has_entries) { + *n = 0; + *rconf = NULL; + } + } + + return 0; +} + /* * I case if some entry is missing in * the kernel, simply write DEVCONFS_UNUSED @@ -1824,6 +1903,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) int ret = -1; int i; NetnsEntry netns = NETNS_ENTRY__INIT; + SysctlEntry *unix_confs = NULL; + size_t sizex = ARRAY_SIZE(unix_conf_entries); SysctlEntry *def_confs4 = NULL, *all_confs4 = NULL; int size4 = ARRAY_SIZE(devconfs4); SysctlEntry *def_confs6 = NULL, *all_confs6 = NULL; @@ -1840,7 +1921,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) o_buf = buf = xmalloc( i * (sizeof(NetnsId*) + sizeof(NetnsId)) + size4 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 + - size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 + size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 + + sizex * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) ); if (!buf) goto out; @@ -1896,6 +1978,16 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) } } + netns.n_unix_conf = sizex; + netns.unix_conf = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry*)); + unix_confs = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry)); + + for (i = 0; i < sizex; i++) { + sysctl_entry__init(&unix_confs[i]); + netns.unix_conf[i] = &unix_confs[i]; + netns.unix_conf[i]->type = SYSCTL_TYPE__CTL_32; + } + ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL); if (ret < 0) goto err_free; @@ -1910,6 +2002,10 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) if (ret < 0) goto err_free; + ret = unix_conf_op(&netns.unix_conf, &netns.n_unix_conf, CTL_READ); + if (ret < 0) + goto err_free; + ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS); err_free: xfree(o_buf); @@ -2151,6 +2247,12 @@ static int restore_netns_conf(struct ns_id *ns) ret = ipv6_conf_op("default", (netns)->def_conf6, (netns)->n_def_conf6, CTL_WRITE, NULL); } + if ((netns)->unix_conf) { + ret = unix_conf_op(&(netns)->unix_conf, &(netns)->n_unix_conf, CTL_WRITE); + if (ret) + goto out; + } + ns->net.netns = netns; out: return ret; diff --git a/images/netdev.proto b/images/netdev.proto index 476a92cedb..ae9c995316 100644 --- a/images/netdev.proto +++ b/images/netdev.proto @@ -71,4 +71,5 @@ message netns_entry { repeated netns_id nsids = 7; optional string ext_key = 8; + repeated sysctl_entry unix_conf = 9; } From 1e39d9bde38f84db642a3de584f72261d393f6d3 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 14 Nov 2019 14:50:43 +0300 Subject: [PATCH 167/277] zdtm: sysctl net.unix.max_dgram_qlen value preservation test Test checks that if the /proc/sys/net/unix/max_dgram_qlen value has been changed in process net namespace, then it is saved after c/r. Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- test/zdtm/lib/Makefile | 2 +- test/zdtm/lib/sysctl.c | 59 ++++++++++++++++++++++++++ test/zdtm/lib/sysctl.h | 7 +++ test/zdtm/static/Makefile | 1 + test/zdtm/static/netns_sub_sysctl.c | 56 ++++++++++++++++++++++++ test/zdtm/static/netns_sub_sysctl.desc | 4 ++ 6 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 test/zdtm/lib/sysctl.c create mode 100644 test/zdtm/lib/sysctl.h create mode 100644 test/zdtm/static/netns_sub_sysctl.c create mode 100644 test/zdtm/static/netns_sub_sysctl.desc diff --git a/test/zdtm/lib/Makefile b/test/zdtm/lib/Makefile index d2d9f1cc31..b87f36e8f2 100644 --- a/test/zdtm/lib/Makefile +++ b/test/zdtm/lib/Makefile @@ -4,7 +4,7 @@ CFLAGS += $(USERCFLAGS) LIB := libzdtmtst.a -LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c fs.c +LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c fs.c sysctl.c LIBOBJ := $(LIBSRC:%.c=%.o) BIN := groups diff --git a/test/zdtm/lib/sysctl.c b/test/zdtm/lib/sysctl.c new file mode 100644 index 0000000000..9583ec3df5 --- /dev/null +++ b/test/zdtm/lib/sysctl.c @@ -0,0 +1,59 @@ +#include + +#include "zdtmtst.h" +#include "sysctl.h" + +int sysctl_read_int(const char *name, int *data) +{ + int fd; + int ret; + char buf[16]; + + fd = open(name, O_RDONLY); + if (fd < 0) { + pr_perror("Can't open %s", name); + return fd; + } + + ret = read(fd, buf, sizeof(buf) - 1); + if (ret < 0) { + pr_perror("Can't read %s", name); + ret = -errno; + goto err; + } + + buf[ret] = '\0'; + + *data = (int)strtoul(buf, NULL, 10); + ret = 0; +err: + close(fd); + return ret; +} + +int sysctl_write_int(const char *name, int val) +{ + int fd; + int ret; + char buf[16]; + + fd = open(name, O_WRONLY); + if (fd < 0) { + pr_perror("Can't open %s", name); + return fd; + } + + sprintf(buf, "%d\n", val); + + ret = write(fd, buf, strlen(buf)); + if (ret < 0) { + pr_perror("Can't write %d into %s", val, name); + ret = -errno; + goto err; + } + + ret = 0; +err: + close(fd); + return ret; +} diff --git a/test/zdtm/lib/sysctl.h b/test/zdtm/lib/sysctl.h new file mode 100644 index 0000000000..67129102fe --- /dev/null +++ b/test/zdtm/lib/sysctl.h @@ -0,0 +1,7 @@ +#ifndef __ZDTM_SYSCTL__ +#define __ZDTM_SYSCTL__ + +extern int sysctl_read_int(const char *name, int *data); +extern int sysctl_write_int(const char *name, int val); + +#endif diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 36d00ca5c8..f9d2efe746 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -207,6 +207,7 @@ TST_NOFILE := \ pipe03 \ netns_sub \ netns_sub_veth \ + netns_sub_sysctl \ unlink_multiple_largefiles \ config_inotify_irmap \ thp_disable \ diff --git a/test/zdtm/static/netns_sub_sysctl.c b/test/zdtm/static/netns_sub_sysctl.c new file mode 100644 index 0000000000..bf828e08e2 --- /dev/null +++ b/test/zdtm/static/netns_sub_sysctl.c @@ -0,0 +1,56 @@ +#include + +#include "zdtmtst.h" +#include "sysctl.h" + +const char *test_doc = "Check dump and restore a net.unix.max_dgram_qlen sysctl parameter in subns"; +const char *test_author = "Alexander Mikhalitsyn "; + +typedef struct { + const char *path; + int old; + int new; +} sysctl_opt_t; + +#define CONF_UNIX_BASE "/proc/sys/net/unix" + +static sysctl_opt_t net_unix_params[] = { + {CONF_UNIX_BASE"/max_dgram_qlen", 0, 0}, + {NULL, 0, 0} +}; + +int main(int argc, char **argv) +{ + int ret = 0; + sysctl_opt_t *p; + test_init(argc, argv); + + for (p = net_unix_params; p->path != NULL; p++) { + p->old = (((unsigned)lrand48()) % 1023) + 1; + if (sysctl_write_int(p->path, p->old)) { + pr_perror("Can't change %s", p->path); + return -1; + } + } + + test_daemon(); + test_waitsig(); + + for (p = net_unix_params; p->path != NULL; p++) { + if (sysctl_read_int(p->path, &p->new)) + ret = 1; + + if (p->old != p->new) { + errno = EINVAL; + pr_perror("%s changed: %d ---> %d", p->path, p->old, p->new); + ret = 1; + } + } + + if (ret) + fail(); + else + pass(); + + return ret; +} diff --git a/test/zdtm/static/netns_sub_sysctl.desc b/test/zdtm/static/netns_sub_sysctl.desc new file mode 100644 index 0000000000..5358426683 --- /dev/null +++ b/test/zdtm/static/netns_sub_sysctl.desc @@ -0,0 +1,4 @@ +{ + 'flavor': 'ns', + 'flags': 'suid' +} From 08766ef82a9f25033b8ec25da7d7ac67ab0da18c Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 16 Dec 2019 15:34:10 +0300 Subject: [PATCH 168/277] x86/cpu: cleanup and improve xfeatures_mask check Make xfeatures_mask check explicit. We were relying on our guess about hardware "backward compatibility" and used ">" check here for a long time. But it looks better to explicitly check that all xfeature bits available on the source are also available on the destination. For xsave_size we need to have smaller size on destination than on source, because xsave operation on small allocated buffer may corrupt the nearby data. So split up comments about xfeatures_mask and xsave_size, as having single comment for quiet a different cases is less understandable. v2: improve comments, remove extra else-ifs, remove extra typecast Signed-off-by: Pavel Tikhomirov --- criu/arch/x86/cpu.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/criu/arch/x86/cpu.c b/criu/arch/x86/cpu.c index 3808b9d33b..72c5bd59ca 100644 --- a/criu/arch/x86/cpu.c +++ b/criu/arch/x86/cpu.c @@ -236,6 +236,7 @@ static int cpu_validate_features(compel_cpuinfo_t *cpu_info) return -1; if (opts.cpu_cap & CPU_CAP_FPU) { + uint64_t m; /* * If we're requested to check FPU only ignore * any other bit. It's up to a user if the @@ -261,24 +262,33 @@ static int cpu_validate_features(compel_cpuinfo_t *cpu_info) #undef __mismatch_fpu_bit /* - * Make sure the xsave features are compatible. We already hit the - * issue with libc where we've checkpointed the container on old - * machine but restored on more modern one and libc fetched new - * xsave frame size directly by xsave instruction with greedy - * feature mask causing programs to misbehave. + * Make sure the xsave features are compatible. Check that on + * the destination there are all the features which were on the + * source. */ - if (cpu_info->xfeatures_mask > rt_cpu_info.xfeatures_mask) { - uint64_t m = cpu_info->xfeatures_mask & ~rt_cpu_info.xfeatures_mask; - pr_err("CPU xfeatures has unsupported bits (%#llx)\n", - (unsigned long long)m); + if ((m = cpu_info->xfeatures_mask & + ~rt_cpu_info.xfeatures_mask)) { + pr_err("CPU xfeatures has unsupported bits (%#" + PRIx64")\n", m); return -1; - } else if (cpu_info->xsave_size != rt_cpu_info.xsave_size) { + } + + /* + * Make sure the xsave sizes are compatible. We already hit the + * issue with libc where we've checkpointed the container on + * old machine but restored on more modern one and libc fetched + * new xsave frame size directly by xsave instruction with + * greedy feature mask causing programs to misbehave. + */ + if (cpu_info->xsave_size != rt_cpu_info.xsave_size) { pr_err("CPU xsave size mismatch (%u/%u)\n", cpu_info->xsave_size, rt_cpu_info.xsave_size); return -1; - } else if (cpu_info->xsave_size_max != rt_cpu_info.xsave_size_max) { + } + if (cpu_info->xsave_size_max != rt_cpu_info.xsave_size_max) { pr_err("CPU xsave max size mismatch (%u/%u)\n", - cpu_info->xsave_size_max, rt_cpu_info.xsave_size_max); + cpu_info->xsave_size_max, + rt_cpu_info.xsave_size_max); return -1; } } From 51c1445588239332d841e45f18720606c6839ff0 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 169/277] crit: fix python3 encoding issues Signed-off-by: Nicolas Viennot --- lib/py/images/images.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/py/images/images.py b/lib/py/images/images.py index f4517d8459..3eedfca69d 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -244,7 +244,7 @@ def load(self, f, pretty=False, no_payload=False): while True: gc = pb.ghost_chunk_entry() buf = f.read(4) - if buf == '': + if len(buf) == 0: break size, = struct.unpack('i', buf) gc.ParseFromString(f.read(size)) @@ -252,13 +252,13 @@ def load(self, f, pretty=False, no_payload=False): if no_payload: f.seek(gc.len, os.SEEK_CUR) else: - entry['extra'] = base64.encodebytes(f.read(gc.len)) + entry['extra'] = base64.encodebytes(f.read(gc.len)).decode('utf-8') entries.append(entry) else: if no_payload: f.seek(0, os.SEEK_END) else: - g_entry['extra'] = base64.encodebytes(f.read()) + g_entry['extra'] = base64.encodebytes(f.read()).decode('utf-8') entries.append(g_entry) return entries From 65cd7a6d935a95f003c88958e51a8d09068887e9 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 20 Dec 2019 18:09:15 +0000 Subject: [PATCH 170/277] scripts: alpine: Install py2 packages with pip The py-future package has been renamed to py3-future [1] and py2 package for yaml has been dropped [2]. [1] https://git.alpinelinux.org/aports/commit/main?id=316d44abaed13964e97eb43c095cd1b64e3943ad [2] https://git.alpinelinux.org/aports/commit/main?id=e369c1fd7707a73f2c3e2b11b613198d9a4106de Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.alpine | 4 +--- scripts/build/Dockerfile.openj9-alpine | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 70fdf480aa..a1d1d91916 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -27,9 +27,7 @@ RUN mv .ccache /tmp && make mrproper && ccache -sz && \ date && make -j $(nproc) CC="$CC" && date && ccache -s RUN apk add \ - py-yaml \ py-pip \ - py2-future \ ip6tables \ iptables \ iproute2 \ @@ -42,5 +40,5 @@ RUN apk add \ # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip install protobuf ipaddress junit_xml flake8 +RUN pip install PyYAML future protobuf ipaddress junit_xml flake8 RUN make -C test/zdtm diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine index 654e7bf317..43a9934446 100644 --- a/scripts/build/Dockerfile.openj9-alpine +++ b/scripts/build/Dockerfile.openj9-alpine @@ -17,9 +17,6 @@ RUN apk update && apk add \ python \ sudo \ maven \ - py-yaml \ - py-pip \ - py2-future \ ip6tables \ iptables \ bash From 210166baeb0417a19ed76e63743a93387ac9a886 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 12 Dec 2019 23:04:30 +0000 Subject: [PATCH 171/277] sockets: Remove duplicate variable assignment Signed-off-by: Radostin Stoyanov --- criu/sockets.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/criu/sockets.c b/criu/sockets.c index 312b55c6dc..80f3153ba1 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -524,7 +524,7 @@ int restore_prepare_socket(int sk) int restore_socket_opts(int sk, SkOptsEntry *soe) { - int ret = 0, val; + int ret = 0, val = 1; struct timeval tv; /* In kernel a bufsize value is doubled. */ u32 bufs[2] = { soe->so_sndbuf / 2, soe->so_rcvbuf / 2}; @@ -547,27 +547,22 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) ret |= restore_opt(sk, SOL_SOCKET, SO_MARK, &soe->so_mark); } if (soe->has_so_passcred && soe->so_passcred) { - val = 1; pr_debug("\tset passcred for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_PASSCRED, &val); } if (soe->has_so_passsec && soe->so_passsec) { - val = 1; pr_debug("\tset passsec for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_PASSSEC, &val); } if (soe->has_so_dontroute && soe->so_dontroute) { - val = 1; pr_debug("\tset dontroute for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_DONTROUTE, &val); } if (soe->has_so_no_check && soe->so_no_check) { - val = 1; pr_debug("\tset no_check for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_NO_CHECK, &val); } if (soe->has_so_broadcast && soe->so_broadcast) { - val = 1; pr_debug("\tset broadcast for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val); } From 5dc882709ddbb52c4c7d35959d1026bc0a06bd64 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 13 Dec 2019 00:10:28 +0000 Subject: [PATCH 172/277] socket: c/r support for SO_KEEPALIVE TCP keepalive packets can be used to determine if a connection is still valid. When the SO_KEEPALIVE option is set, TCP packets are periodically sent to keep the connection alive. This patch implements checkpoint/restore support for SO_KEEPALIVE, TCP_KEEPIDLE, TCP_KEEPINTVL and TCP_KEEPCNT options. Signed-off-by: Radostin Stoyanov --- criu/include/sk-inet.h | 2 +- criu/sk-inet.c | 6 +++++- criu/sk-tcp.c | 20 +++++++++++++++++++- criu/sockets.c | 20 ++++++++++++++++++++ images/sk-opts.proto | 4 ++++ 5 files changed, 49 insertions(+), 3 deletions(-) diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h index 79966517bf..dec67ca6c0 100644 --- a/criu/include/sk-inet.h +++ b/criu/include/sk-inet.h @@ -83,7 +83,7 @@ extern void tcp_locked_conn_add(struct inet_sk_info *); extern void rst_unlock_tcp_connections(void); extern void cpt_unlock_tcp_connections(void); -extern int dump_one_tcp(int sk, struct inet_sk_desc *sd); +extern int dump_one_tcp(int sk, struct inet_sk_desc *sd, SkOptsEntry *soe); extern int restore_one_tcp(int sk, struct inet_sk_info *si); #define SK_EST_PARAM "tcp-established" diff --git a/criu/sk-inet.c b/criu/sk-inet.c index f9c64c7af5..3425485851 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -551,7 +551,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa switch (proto) { case IPPROTO_TCP: - err = (type != SOCK_RAW) ? dump_one_tcp(lfd, sk) : 0; + err = (type != SOCK_RAW) ? dump_one_tcp(lfd, sk, &skopts) : 0; break; case IPPROTO_UDP: case IPPROTO_UDPLITE: @@ -747,6 +747,10 @@ static int post_open_inet_sk(struct file_desc *d, int sk) if (!val && restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val)) return -1; + val = ii->ie->opts->so_keepalive; + if (!val && restore_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val)) + return -1; + return 0; } diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c index 4fd2eb8e60..7ee6038186 100644 --- a/criu/sk-tcp.c +++ b/criu/sk-tcp.c @@ -218,8 +218,26 @@ static int dump_tcp_conn_state(struct inet_sk_desc *sk) return ret; } -int dump_one_tcp(int fd, struct inet_sk_desc *sk) +int dump_one_tcp(int fd, struct inet_sk_desc *sk, SkOptsEntry *soe) { + soe->has_tcp_keepcnt = true; + if (dump_opt(fd, SOL_TCP, TCP_KEEPCNT, &soe->tcp_keepcnt)) { + pr_perror("Can't read TCP_KEEPCNT"); + return -1; + } + + soe->has_tcp_keepidle = true; + if (dump_opt(fd, SOL_TCP, TCP_KEEPIDLE, &soe->tcp_keepidle)) { + pr_perror("Can't read TCP_KEEPIDLE"); + return -1; + } + + soe->has_tcp_keepintvl = true; + if (dump_opt(fd, SOL_TCP, TCP_KEEPINTVL, &soe->tcp_keepintvl)) { + pr_perror("Can't read TCP_KEEPINTVL"); + return -1; + } + if (sk->dst_port == 0) return 0; diff --git a/criu/sockets.c b/criu/sockets.c index 80f3153ba1..2e1ce9d7bc 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -566,6 +566,22 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) pr_debug("\tset broadcast for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val); } + if (soe->has_so_keepalive && soe->so_keepalive) { + pr_debug("\tset keepalive for socket\n"); + ret |= restore_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val); + } + if (soe->has_tcp_keepcnt) { + pr_debug("\tset keepcnt for socket\n"); + ret |= restore_opt(sk, SOL_TCP, TCP_KEEPCNT, &soe->tcp_keepcnt); + } + if (soe->has_tcp_keepidle) { + pr_debug("\tset keepidle for socket\n"); + ret |= restore_opt(sk, SOL_TCP, TCP_KEEPIDLE, &soe->tcp_keepidle); + } + if (soe->has_tcp_keepintvl) { + pr_debug("\tset keepintvl for socket\n"); + ret |= restore_opt(sk, SOL_TCP, TCP_KEEPINTVL, &soe->tcp_keepintvl); + } tv.tv_sec = soe->so_snd_tmo_sec; tv.tv_usec = soe->so_snd_tmo_usec; @@ -651,6 +667,10 @@ int dump_socket_opts(int sk, SkOptsEntry *soe) soe->has_so_broadcast = true; soe->so_broadcast = val ? true : false; + ret |= dump_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val); + soe->has_so_keepalive = true; + soe->so_keepalive = val ? true : false; + ret |= dump_bound_dev(sk, soe); ret |= dump_socket_filter(sk, soe); diff --git a/images/sk-opts.proto b/images/sk-opts.proto index c93ec5fd5c..336cca22ab 100644 --- a/images/sk-opts.proto +++ b/images/sk-opts.proto @@ -23,6 +23,10 @@ message sk_opts_entry { repeated fixed64 so_filter = 16; optional bool so_reuseport = 17; optional bool so_broadcast = 18; + optional bool so_keepalive = 19; + optional uint32 tcp_keepcnt = 20; + optional uint32 tcp_keepidle = 21; + optional uint32 tcp_keepintvl = 22; } enum sk_shutdown { From c8e96378267473627d841bdbd1df64cf980f3a69 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 13 Dec 2019 04:01:36 +0000 Subject: [PATCH 173/277] zdtm: Add test for SO_KEEPALIVE Signed-off-by: Radostin Stoyanov --- test/zdtm/static/Makefile | 3 +- test/zdtm/static/socket-tcp-keepalive.c | 97 +++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 test/zdtm/static/socket-tcp-keepalive.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index f9d2efe746..ea5d3c42e6 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -105,7 +105,8 @@ TST_NOFILE := \ socket-tcp-unconn \ socket-tcp6-unconn \ socket-tcp-syn-sent \ - socket-tcp-skip-in-flight \ + socket-tcp-skip-in-flight \ + socket-tcp-keepalive \ sock_opts00 \ sock_opts01 \ sk-unix-unconn \ diff --git a/test/zdtm/static/socket-tcp-keepalive.c b/test/zdtm/static/socket-tcp-keepalive.c new file mode 100644 index 0000000000..a977a03b53 --- /dev/null +++ b/test/zdtm/static/socket-tcp-keepalive.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "test checkpoint/restore of SO_KEEPALIVE\n"; +const char *test_author = "Radostin Stoyanov \n"; + +int main(int argc, char **argv) +{ + int sk; + int alive = 1; + int cnt = 5; + int idle = 10; + int intvl = 15; + int optval; + socklen_t optlen; + + test_init(argc, argv); + + sk = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + pr_perror("Can't create socket"); + return 1; + } + + /* Set the option active */ + if (setsockopt(sk, SOL_SOCKET, SO_KEEPALIVE, &alive, sizeof(alive)) < 0) { + pr_perror("setsockopt SO_KEEPALIVE"); + return 1; + } + + if (setsockopt(sk, SOL_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt)) < 0) { + pr_perror("setsockopt TCP_KEEPCNT"); + return 1; + } + + if (setsockopt(sk, SOL_TCP, TCP_KEEPIDLE, &idle, sizeof(idle)) < 0) { + pr_perror("setsockopt TCP_KEEPIDLE"); + return 1; + } + + optval = 5; + optlen = sizeof(optval); + if (setsockopt(sk, SOL_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl)) < 0) { + pr_perror("setsockopt TCP_KEEPINTVL"); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (getsockopt(sk, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen)) { + pr_perror("getsockopt SO_KEEPALIVE"); + return 1; + } + + if (optlen != sizeof(optval) || optval != alive) { + fail("SO_KEEPALIVE not set"); + return 1; + } + + if (getsockopt(sk, SOL_TCP, TCP_KEEPCNT, &optval, &optlen) < 0) { + pr_perror("getsockopt TCP_KEEPCNT"); + return 1; + } + + if (optval != cnt) { + fail("TCP_KEEPCNT has incorrect value (%d != %d)", cnt, optval); + return 1; + } + + if (getsockopt(sk, SOL_TCP, TCP_KEEPIDLE, &optval, &optlen) < 0) { + pr_perror("getsockopt TCP_KEEPIDLE"); + return 1; + } + + if (optval != idle) { + fail("TCP_KEEPIDLE has incorrect value (%d != %d)", idle, optval); + return 1; + } + + if (getsockopt(sk, SOL_TCP, TCP_KEEPINTVL, &optval, &optlen) < 0) { + pr_perror("getsockopt TCP_KEEPINTVL"); + return 1; + } + + if (optval != intvl) { + fail("TCP_KEEPINTVL has incorrect value (%d != %d)", intvl, optval); + return 1; + } + + pass(); + return 0; +} \ No newline at end of file From d5a1c5455115e43bfe687fe0ab176f76f41b7926 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 20 Dec 2019 17:50:37 +0100 Subject: [PATCH 174/277] Fix tests on Ubuntu It seems like Ubuntu introduced a overlayfs change which breaks CRIU: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257 This disables overlayfs (unfortunately) in most tests by switching to devicemapper or vfs. Upstream kernels do not seem to have this problem. This also adds the 'docker-test' for xenial which still has a working overlayfs from CRIU's point of view. Also adjust Podman Ubuntu package location Podman Ubuntu packages are now available via OBS and no longer via PPA. Signed-off-by: Adrian Reber --- .travis.yml | 13 +++++++++++-- scripts/travis/Makefile | 5 ++++- scripts/travis/docker-test.sh | 15 ++++++++++----- scripts/travis/podman-test.sh | 13 ++++++++++--- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index e6e4101911..25dd6a29b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,6 @@ env: - TR_ARCH=local CLANG=1 COMPAT_TEST=y - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - - TR_ARCH=docker-test - TR_ARCH=openj9-test matrix: include: @@ -57,6 +56,16 @@ matrix: arch: amd64 env: TR_ARCH=podman-test dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=docker-test + dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=docker-test DIST=xenial + # On xenial it should be possible to test overlayfs; + # broken on the latest bionic kernel + dist: xenial - os: linux arch: amd64 env: TR_ARCH=alpine CLANG=1 @@ -79,9 +88,9 @@ matrix: dist: bionic allow_failures: - env: TR_ARCH=docker-test + - env: TR_ARCH=docker-test DIST=xenial - env: TR_ARCH=fedora-rawhide - env: TR_ARCH=local GCOV=1 - - env: TR_ARCH=podman-test script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH after_success: diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index 3731711490..17abb703a8 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -58,7 +58,10 @@ docker-test: podman-test: ./podman-test.sh -openj9-test: +# overlayfs behaves differently on Ubuntu and breaks CRIU +# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257 +# Switch to devicemapper +openj9-test: restart-docker ./openj9-test.sh %: diff --git a/scripts/travis/docker-test.sh b/scripts/travis/docker-test.sh index ee96fef48d..ac420a4450 100755 --- a/scripts/travis/docker-test.sh +++ b/scripts/travis/docker-test.sh @@ -19,11 +19,16 @@ apt-get update -qq apt-get install -qq docker-ce -cat > /etc/docker/daemon.json < /etc/docker/daemon.json +else + echo '{ "experimental": true }' > /etc/docker/daemon.json +fi service docker restart diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index eafdc73bee..5189477cd6 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -1,7 +1,13 @@ #!/bin/bash set -x -e -o pipefail -add-apt-repository -y ppa:projectatomic/ppa +echo 'deb http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list + +wget -nv https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable/xUbuntu_18.04/Release.key -O- | apt-key add - + +# podman conflicts with a man page from docker-ce +# this is a podman packaging bug (https://github.com/containers/libpod/issues/4747) +apt-get -y purge docker-ce apt-get install -qq \ apt-transport-https \ @@ -10,7 +16,6 @@ apt-get install -qq \ software-properties-common apt-get update -qq - apt-get install -qqy podman containernetworking-plugins export SKIP_TRAVIS_TEST=1 @@ -21,7 +26,9 @@ cd ../../ make install -podman info +# overlaysfs behaves differently on Ubuntu and breaks CRIU +# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257 +podman --storage-driver vfs info criu --version From ee96f50abb743bfbeb79563fb92c541ab13301f5 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Sat, 21 Dec 2019 18:08:23 +0000 Subject: [PATCH 175/277] style: Enforce kernel style -Wdeclaration-after-statement Include warnings that the kernel uses during compilation: -Wdeclaration-after-statement: enforces having variables declared at the top of scopes Signed-off-by: Nicolas Viennot [Generated a commit message from the pull request] Signed-off-by: Dmitry Safonov --- Makefile | 2 +- criu/img-remote.c | 12 +++++++---- criu/net.c | 7 +++++-- criu/page-xfer.c | 10 +++++----- criu/pie/util-vdso.c | 3 ++- test/zdtm/Makefile.inc | 1 + test/zdtm/static/arm-neon00.c | 11 +++++----- test/zdtm/static/child_subreaper.c | 6 +++--- test/zdtm/static/config_inotify_irmap.c | 3 ++- test/zdtm/static/inotify00.c | 3 ++- test/zdtm/static/maps03.c | 3 ++- test/zdtm/static/mnt_ext_dev.c | 3 ++- test/zdtm/static/mntns_link_remap.c | 2 +- test/zdtm/static/mntns_open.c | 2 +- test/zdtm/static/mountpoints.c | 2 +- test/zdtm/static/remap_dead_pid.c | 4 ++-- test/zdtm/static/selinux01.c | 3 ++- test/zdtm/static/sigaltstack.c | 20 +++++++++---------- test/zdtm/static/socket-tcp-syn-sent.c | 4 ++-- test/zdtm/static/unlink_multiple_largefiles.c | 3 ++- test/zdtm/transition/file_aio.c | 3 ++- test/zdtm/transition/file_read.c | 5 ++++- test/zdtm/transition/maps008.c | 14 +++++++------ 23 files changed, 74 insertions(+), 52 deletions(-) diff --git a/Makefile b/Makefile index ef76d706c2..133390f17c 100644 --- a/Makefile +++ b/Makefile @@ -100,7 +100,7 @@ export PROTOUFIX DEFINES DEFINES += -D_FILE_OFFSET_BITS=64 DEFINES += -D_GNU_SOURCE -WARNINGS := -Wall -Wformat-security +WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement CFLAGS-GCOV := --coverage -fno-exceptions -fno-inline -fprofile-update=atomic export CFLAGS-GCOV diff --git a/criu/img-remote.c b/criu/img-remote.c index 433c012ab7..47c23faf4d 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -979,9 +979,10 @@ int write_remote_image_connection(char *snapshot_id, char *path, int flags) int finish_remote_dump(void) { + int fd; pr_info("Dump side is calling finish\n"); - int fd = write_remote_image_connection(NULL_SNAPSHOT_ID, FINISH, O_WRONLY); + fd = write_remote_image_connection(NULL_SNAPSHOT_ID, FINISH, O_WRONLY); if (fd == -1) { pr_err("Unable to open finish dump connection"); return -1; @@ -993,9 +994,10 @@ int finish_remote_dump(void) int finish_remote_restore(void) { + int fd; pr_info("Restore side is calling finish\n"); - int fd = read_remote_image_connection(NULL_SNAPSHOT_ID, FINISH); + fd = read_remote_image_connection(NULL_SNAPSHOT_ID, FINISH); if (fd == -1) { pr_err("Unable to open finish restore connection\n"); return -1; @@ -1078,10 +1080,12 @@ static int pull_snapshot_ids(void) int push_snapshot_id(void) { int n; - restoring = false; SnapshotIdEntry rn = SNAPSHOT_ID_ENTRY__INIT; - int sockfd = write_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG, O_APPEND); + int sockfd; + + restoring = false; + sockfd = write_remote_image_connection(NULL_SNAPSHOT_ID, PARENT_IMG, O_APPEND); if (sockfd < 0) { pr_err("Unable to open snapshot id push connection\n"); return -1; diff --git a/criu/net.c b/criu/net.c index 2e70d81615..8275fc6d3f 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2846,6 +2846,9 @@ int macvlan_ext_add(struct external *ext) static int prep_ns_sockets(struct ns_id *ns, bool for_dump) { int nsret = -1, ret; +#ifdef CONFIG_HAS_SELINUX + security_context_t ctx; +#endif if (ns->type != NS_CRIU) { pr_info("Switching to %d's net for collecting sockets\n", ns->ns_pid); @@ -2883,7 +2886,6 @@ static int prep_ns_sockets(struct ns_id *ns, bool for_dump) * policies installed. For Fedora based systems this is part * of the container-selinux package. */ - security_context_t ctx; /* * This assumes that all processes CRIU wants to dump are labeled @@ -3323,6 +3325,7 @@ int kerndat_link_nsid() } if (pid == 0) { + bool has_link_nsid; NetDeviceEntry nde = NET_DEVICE_ENTRY__INIT; struct net_link link = { .created = false, @@ -3365,7 +3368,7 @@ int kerndat_link_nsid() exit(1); } - bool has_link_nsid = false; + has_link_nsid = false; if (check_link_nsid(sk, &has_link_nsid)) exit(1); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index ff7c620bc9..c9b4f2fbc1 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -643,17 +643,17 @@ unsigned long handle_faulty_iov(int pid, struct iovec* riov, unsigned long* aux_len, unsigned long partial_read_bytes) { + struct iovec dummy; + ssize_t bytes_read; + unsigned long offset = 0; + unsigned long final_read_cnt = 0; + /* Handling Case 2*/ if (riov[faulty_index].iov_len == PAGE_SIZE) { cnt_sub(CNT_PAGES_WRITTEN, 1); return 0; } - struct iovec dummy; - ssize_t bytes_read; - unsigned long offset = 0; - unsigned long final_read_cnt = 0; - /* Handling Case 3-Part 3.2*/ offset = (partial_read_bytes)? partial_read_bytes : PAGE_SIZE; diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c index 104da06332..58b27680c8 100644 --- a/criu/pie/util-vdso.c +++ b/criu/pie/util-vdso.c @@ -243,10 +243,11 @@ static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, k = elf_hash((const unsigned char *)symbol); for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) { - addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; Sym_t *sym; char *name; + addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; + addr += sizeof(Sym_t)*j; if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size)) continue; diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 32fc72d320..6958d128e3 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -38,6 +38,7 @@ ifeq ($(origin CC), default) CC := $(CROSS_COMPILE)$(HOSTCC) endif CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 +CFLAGS += -Wdeclaration-after-statement CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include diff --git a/test/zdtm/static/arm-neon00.c b/test/zdtm/static/arm-neon00.c index 96da16c6b0..ce8123e515 100644 --- a/test/zdtm/static/arm-neon00.c +++ b/test/zdtm/static/arm-neon00.c @@ -12,13 +12,14 @@ const char *test_author = "Alexander Karatshov "; int main(int argc, char ** argv) { + int a, b, c, y1, y2; + srand(time(0)); - int a = rand() % 100; - int b = rand() % 100; - int c = rand() % 100; - int y1 = a + b*c; - int y2; + a = rand() % 100; + b = rand() % 100; + c = rand() % 100; + y1 = a + b*c; test_init(argc, argv); diff --git a/test/zdtm/static/child_subreaper.c b/test/zdtm/static/child_subreaper.c index 267795249b..6d02c9f933 100644 --- a/test/zdtm/static/child_subreaper.c +++ b/test/zdtm/static/child_subreaper.c @@ -8,10 +8,11 @@ const char *test_author = "MichaÅ‚ CÅ‚apiÅ„ski "; int main(int argc, char **argv) { + int cs_before = 1, cs_after, ret; + test_init(argc, argv); - int cs_before = 1; - int ret = prctl(PR_SET_CHILD_SUBREAPER, cs_before, 0, 0, 0); + ret = prctl(PR_SET_CHILD_SUBREAPER, cs_before, 0, 0, 0); if (ret) { pr_perror("Can't set child subreaper attribute, err = %d", ret); exit(1); @@ -20,7 +21,6 @@ int main(int argc, char **argv) test_daemon(); test_waitsig(); - int cs_after; ret = prctl(PR_GET_CHILD_SUBREAPER, (unsigned long)&cs_after, 0, 0, 0); if (ret) { pr_perror("Can't get child subreaper attribute, err = %d", ret); diff --git a/test/zdtm/static/config_inotify_irmap.c b/test/zdtm/static/config_inotify_irmap.c index 831dc19741..3cbeba7d38 100644 --- a/test/zdtm/static/config_inotify_irmap.c +++ b/test/zdtm/static/config_inotify_irmap.c @@ -31,6 +31,7 @@ char test_files[2][128] = {TDIR"/zdtm-test", TDIR"/zdtm-test1",}; int main (int argc, char *argv[]) { + FILE *configfile; char buf[BUFF_SIZE]; int fd, wd, i; @@ -56,7 +57,7 @@ int main (int argc, char *argv[]) } } - FILE *configfile = fopen(CONFIG_PATH, "w"); + configfile = fopen(CONFIG_PATH, "w"); if (configfile == NULL) { pr_perror("Unable to create configuration file %s", CONFIG_PATH); goto err; diff --git a/test/zdtm/static/inotify00.c b/test/zdtm/static/inotify00.c index 67088edd8f..635c050471 100644 --- a/test/zdtm/static/inotify00.c +++ b/test/zdtm/static/inotify00.c @@ -125,9 +125,10 @@ int main (int argc, char *argv[]) { pid_t pid; task_waiter_t t; - task_waiter_init(&t); static char buf[PATH_MAX]; + task_waiter_init(&t); + if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL)) { pr_perror("Unable to remount /"); return 1; diff --git a/test/zdtm/static/maps03.c b/test/zdtm/static/maps03.c index f2bf7957a3..0e0a5b8f25 100644 --- a/test/zdtm/static/maps03.c +++ b/test/zdtm/static/maps03.c @@ -16,9 +16,10 @@ const char *test_author = "Cyrill Gorcunov "; int main(int argc, char **argv) { - test_init(argc, argv); unsigned char *mem; + test_init(argc, argv); + test_msg("Alloc huge VMA\n"); mem = (void *)mmap(NULL, (10L << 30), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); diff --git a/test/zdtm/static/mnt_ext_dev.c b/test/zdtm/static/mnt_ext_dev.c index a9ac01333b..1d60fc92fe 100644 --- a/test/zdtm/static/mnt_ext_dev.c +++ b/test/zdtm/static/mnt_ext_dev.c @@ -20,10 +20,11 @@ TEST_OPTION(dirname, string, "directory name", 1); int main(int argc, char **argv) { char *loop, fd, dfd, fd2; - test_init(argc, argv); struct stat st, stp, st2; char dname[PATH_MAX], dname2[PATH_MAX]; + test_init(argc, argv); + snprintf(dname, sizeof(dname), "%s/test_dir", dirname); snprintf(dname2, sizeof(dname2), "%s/test_dir2", dirname); diff --git a/test/zdtm/static/mntns_link_remap.c b/test/zdtm/static/mntns_link_remap.c index 642641b161..6ac08191ab 100644 --- a/test/zdtm/static/mntns_link_remap.c +++ b/test/zdtm/static/mntns_link_remap.c @@ -230,8 +230,8 @@ int main(int argc, char **argv) if (pid > 0) { - kill(pid, SIGTERM); int status = 1; + kill(pid, SIGTERM); wait(&status); if (WIFEXITED(status)) { if (WEXITSTATUS(status) == AWK_OK) diff --git a/test/zdtm/static/mntns_open.c b/test/zdtm/static/mntns_open.c index e19c4ea72d..c687080a78 100644 --- a/test/zdtm/static/mntns_open.c +++ b/test/zdtm/static/mntns_open.c @@ -119,8 +119,8 @@ int main(int argc, char **argv) test_waitsig(); if (pid > 0) { - kill(pid, SIGTERM); int status = 1; + kill(pid, SIGTERM); wait(&status); if (WIFEXITED(status)) { if (WEXITSTATUS(status) == AWK_OK) diff --git a/test/zdtm/static/mountpoints.c b/test/zdtm/static/mountpoints.c index 00475cdc50..cf54d10960 100644 --- a/test/zdtm/static/mountpoints.c +++ b/test/zdtm/static/mountpoints.c @@ -292,8 +292,8 @@ int main(int argc, char **argv) } if (pid > 0) { - kill(pid, SIGTERM); int status = 1; + kill(pid, SIGTERM); wait(&status); if (status) return 1; diff --git a/test/zdtm/static/remap_dead_pid.c b/test/zdtm/static/remap_dead_pid.c index 261c591b79..5d4241fc6f 100644 --- a/test/zdtm/static/remap_dead_pid.c +++ b/test/zdtm/static/remap_dead_pid.c @@ -40,12 +40,12 @@ int main(int argc, char **argv) while(1) sleep(10); } else { - test_msg("child is %d\n", pid); - int fd, ret; char path[PATH_MAX]; pid_t result; + test_msg("child is %d\n", pid); + sprintf(path, proc_path, pid); fd = open(path, O_RDONLY); if (fd < 0) { diff --git a/test/zdtm/static/selinux01.c b/test/zdtm/static/selinux01.c index 9966455c47..cec5980e88 100644 --- a/test/zdtm/static/selinux01.c +++ b/test/zdtm/static/selinux01.c @@ -133,6 +133,7 @@ int check_sockcreate_empty() int main(int argc, char **argv) { + int sk; char ctx[1024]; test_init(argc, argv); @@ -159,7 +160,7 @@ int main(int argc, char **argv) #endif /* Open our test socket */ - int sk = socket(AF_INET, SOCK_STREAM, 0); + sk = socket(AF_INET, SOCK_STREAM, 0); memset(ctx, 0, 1024); /* Read out the socket label */ if (fgetxattr(sk, "security.selinux", ctx, 1024) == -1) { diff --git a/test/zdtm/static/sigaltstack.c b/test/zdtm/static/sigaltstack.c index d324b0d378..f36d409f5e 100644 --- a/test/zdtm/static/sigaltstack.c +++ b/test/zdtm/static/sigaltstack.c @@ -61,17 +61,17 @@ void thread_sigaction(int signo, siginfo_t *info, void *context) static void *thread_func(void *arg) { + struct sigaction sa = { + .sa_sigaction = thread_sigaction, + .sa_flags = SA_RESTART | SA_ONSTACK, + }; + sas_state[SAS_THRD_OLD] = (stack_t) { .ss_size = sizeof(stack_thread) - 8, .ss_sp = stack_thread, .ss_flags = 0, }; - struct sigaction sa = { - .sa_sigaction = thread_sigaction, - .sa_flags = SA_RESTART | SA_ONSTACK, - }; - sigemptyset(&sa.sa_mask); if (sigaction(SIGUSR2, &sa, NULL)) { @@ -103,17 +103,17 @@ int main(int argc, char *argv[]) { pthread_t thread; + struct sigaction sa = { + .sa_sigaction = leader_sigaction, + .sa_flags = SA_RESTART | SA_ONSTACK, + }; + sas_state[SAS_MAIN_OLD] = (stack_t) { .ss_size = sizeof(stack_main) - 8, .ss_sp = stack_main, .ss_flags = 0, }; - struct sigaction sa = { - .sa_sigaction = leader_sigaction, - .sa_flags = SA_RESTART | SA_ONSTACK, - }; - sigemptyset(&sa.sa_mask); test_init(argc, argv); diff --git a/test/zdtm/static/socket-tcp-syn-sent.c b/test/zdtm/static/socket-tcp-syn-sent.c index cf4c3bb46c..755532a8a1 100644 --- a/test/zdtm/static/socket-tcp-syn-sent.c +++ b/test/zdtm/static/socket-tcp-syn-sent.c @@ -37,7 +37,7 @@ int main(int argc, char **argv) { int fd, fd_s, sock, sk; union sockaddr_inet addr; - char cmd[4096]; + char c, cmd[4096]; test_init(argc, argv); @@ -113,7 +113,7 @@ int main(int argc, char **argv) fcntl(sock, F_SETFL, 0); - char c = 5; + c = 5; if (write(sock, &c, 1) != 1) { fail("Unable to send data"); return 1; diff --git a/test/zdtm/static/unlink_multiple_largefiles.c b/test/zdtm/static/unlink_multiple_largefiles.c index 7cf628606b..2f9248c2f7 100644 --- a/test/zdtm/static/unlink_multiple_largefiles.c +++ b/test/zdtm/static/unlink_multiple_largefiles.c @@ -30,10 +30,11 @@ void create_check_pattern(char *buf, size_t count, unsigned char seed) struct fiemap *read_fiemap(int fd) { - test_msg("Obtaining fiemap for fd %d\n", fd); struct fiemap *fiemap, *tmp; int extents_size; + test_msg("Obtaining fiemap for fd %d\n", fd); + fiemap = malloc(sizeof(struct fiemap)); if (fiemap == NULL) { pr_perror("Cannot allocate fiemap"); diff --git a/test/zdtm/transition/file_aio.c b/test/zdtm/transition/file_aio.c index a160101589..4a76c93907 100644 --- a/test/zdtm/transition/file_aio.c +++ b/test/zdtm/transition/file_aio.c @@ -17,7 +17,6 @@ const char *test_author = "Andrew Vagin "; int main(int argc, char **argv) { - test_init(argc, argv); char buf[BUF_SIZE]; int fd; struct aiocb aiocb; @@ -25,6 +24,8 @@ int main(int argc, char **argv) char tmpfname[256]="/tmp/file_aio.XXXXXX"; int ret; + test_init(argc, argv); + fd = mkstemp(tmpfname); if (fd == -1) { pr_perror("mkstemp() failed"); diff --git a/test/zdtm/transition/file_read.c b/test/zdtm/transition/file_read.c index 50dffd8c47..5d6e4dbbac 100644 --- a/test/zdtm/transition/file_read.c +++ b/test/zdtm/transition/file_read.c @@ -158,9 +158,11 @@ static void chew_some_file(int num) rv = SEEK_FAILED; goto out_exit; case 1: - rv = FILE_CORRUPTED; + { int fd1; char str[PATH_MAX]; + + rv = FILE_CORRUPTED; // create standard file sprintf(str, "standard_%s.%d", filename, num); fd1 = open(str, O_WRONLY | O_CREAT | O_TRUNC, 0666); @@ -168,6 +170,7 @@ static void chew_some_file(int num) pr_perror("can't write %s", str); close(fd1); goto out_exit; + } } } rv = SUCCESS; diff --git a/test/zdtm/transition/maps008.c b/test/zdtm/transition/maps008.c index 5f6eb0887d..7ed7c10a5c 100644 --- a/test/zdtm/transition/maps008.c +++ b/test/zdtm/transition/maps008.c @@ -348,6 +348,7 @@ static int proc11_func(task_waiter_t *setup_waiter) void *mem3_old = mem3; size_t mem3_size_old = mem3_size; uint32_t crc_epoch = 0; + uint8_t *proc1_mem3; pstree->proc11 = getpid(); xmunmap(mem3, MEM3_START_CUT); @@ -382,7 +383,7 @@ static int proc11_func(task_waiter_t *setup_waiter) chk_proc_mem_eq(pstree->proc11, mem3, mem3_size, pstree->proc112, mem3, mem3_size + MEM3_END_CUT); - uint8_t *proc1_mem3 = mmap_proc_mem(pstree->proc1, + proc1_mem3 = mmap_proc_mem(pstree->proc1, (unsigned long)mem3_old, mem3_size_old); check_mem_eq(mem3, mem3_size, proc1_mem3 + MEM3_START_CUT, mem3_size); xmunmap(proc1_mem3, mem3_size_old); @@ -489,16 +490,17 @@ static void sigchld_hand(int signo, siginfo_t *info, void *ucontext) int main(int argc, char **argv) { - test_init(argc, argv); - - pstree = (struct pstree *)mmap_ashmem(PAGE_SIZE); - test_sync = (struct test_sync *)mmap_ashmem(sizeof(*test_sync)); - struct sigaction sa = { .sa_sigaction = sigchld_hand, .sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP }; sigemptyset(&sa.sa_mask); + + test_init(argc, argv); + + pstree = (struct pstree *)mmap_ashmem(PAGE_SIZE); + test_sync = (struct test_sync *)mmap_ashmem(sizeof(*test_sync)); + if (sigaction(SIGCHLD, &sa, NULL)) { pr_perror("SIGCHLD handler setup"); exit(1); From 09c918cafe2791609a2e864ff6a174620237221b Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Sat, 21 Dec 2019 18:13:06 +0000 Subject: [PATCH 176/277] style: Enforce kernel style -Wstrict-prototypes Include warnings that the kernel uses during compilation: -Wstrict-prototypes: enforces full declaration of functions. Previously, when declaring extern void func(), one can call func(123) and have no compilation error. This is dangerous. The correct declaration is extern void func(void). Signed-off-by: Nicolas Viennot [Generated a commit message from the pull request] Signed-off-by: Dmitry Safonov --- Makefile | 2 +- criu/config.c | 2 +- criu/cr-check.c | 10 ++++----- criu/cr-dump.c | 4 ++-- criu/cr-restore.c | 8 +++---- criu/cr-service.c | 4 ++-- criu/crtools.c | 2 +- criu/fault-injection.c | 2 +- criu/img-remote.c | 6 ++--- criu/include/cr_options.h | 4 ++-- criu/include/img-remote.h | 14 ++++++------ criu/include/lsm.h | 4 ++-- criu/include/mount.h | 2 +- criu/include/net.h | 8 +++---- criu/include/tls.h | 2 +- criu/kerndat.c | 6 ++--- criu/lsm.c | 2 +- criu/mount.c | 6 ++--- criu/namespaces.c | 2 +- criu/net.c | 10 ++++----- criu/pstree.c | 2 +- criu/seize.c | 2 +- criu/tls.c | 6 ++--- criu/util.c | 4 ++-- soccr/test/tcp-conn.c | 2 +- soccr/test/tcp-constructor.c | 2 +- test/others/unix-callback/unix-client.c | 2 +- test/others/unix-callback/unix-server.c | 2 +- test/zdtm/Makefile.inc | 2 +- test/zdtm/lib/test.c | 6 ++--- test/zdtm/static/apparmor.c | 4 ++-- .../static/child_subreaper_and_reparent.c | 6 ++--- .../static/child_subreaper_existing_child.c | 6 ++--- test/zdtm/static/dumpable02.c | 2 +- test/zdtm/static/fdt_shared.c | 4 ++-- test/zdtm/static/file_locks00.c | 2 +- test/zdtm/static/inotify_system.c | 2 +- test/zdtm/static/maps00.c | 7 +++--- test/zdtm/static/selinux00.c | 8 +++---- test/zdtm/static/selinux01.c | 10 ++++----- test/zdtm/static/session02.c | 8 +++---- test/zdtm/static/session03.c | 10 ++++----- test/zdtm/transition/netlink00.c | 22 +++++++++---------- 43 files changed, 111 insertions(+), 110 deletions(-) diff --git a/Makefile b/Makefile index 133390f17c..00e563c113 100644 --- a/Makefile +++ b/Makefile @@ -100,7 +100,7 @@ export PROTOUFIX DEFINES DEFINES += -D_FILE_OFFSET_BITS=64 DEFINES += -D_GNU_SOURCE -WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement +WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement -Wstrict-prototypes CFLAGS-GCOV := --coverage -fno-exceptions -fno-inline -fprofile-update=atomic export CFLAGS-GCOV diff --git a/criu/config.c b/criu/config.c index 1b921d4549..6fb6bfdff7 100644 --- a/criu/config.c +++ b/criu/config.c @@ -854,7 +854,7 @@ int parse_options(int argc, char **argv, bool *usage_error, return 1; } -int check_options() +int check_options(void) { if (opts.tcp_established_ok) pr_info("Will dump/restore TCP connections\n"); diff --git a/criu/cr-check.c b/criu/cr-check.c index 729b2dc38e..17dd29b42e 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -51,7 +51,7 @@ #include "restorer.h" #include "uffd.h" -static char *feature_name(int (*func)()); +static char *feature_name(int (*func)(void)); static int check_tty(void) { @@ -513,7 +513,7 @@ static int check_ipc(void) return -1; } -static int check_sigqueuinfo() +static int check_sigqueuinfo(void) { siginfo_t info = { .si_code = 1 }; @@ -960,7 +960,7 @@ static int clone_cb(void *_arg) { exit(0); } -static int check_clone_parent_vs_pid() +static int check_clone_parent_vs_pid(void) { struct clone_arg ca; pid_t pid; @@ -1447,7 +1447,7 @@ static int check_external_net_ns(void) struct feature_list { char *name; - int (*func)(); + int (*func)(void); }; static struct feature_list feature_list[] = { @@ -1517,7 +1517,7 @@ int check_add_feature(char *feat) return -1; } -static char *feature_name(int (*func)()) +static char *feature_name(int (*func)(void)) { struct feature_list *fl; diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 6bdd28400d..0c9b6d6c7d 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1440,7 +1440,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) static int alarm_attempts = 0; -bool alarm_timeouted() { +bool alarm_timeouted(void) { return alarm_attempts > 0; } @@ -1457,7 +1457,7 @@ static void alarm_handler(int signo) BUG(); } -static int setup_alarm_handler() +static int setup_alarm_handler(void) { struct sigaction sa = { .sa_handler = alarm_handler, diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 2ace4d078d..516477dcdd 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -183,13 +183,13 @@ static int __restore_wait_inprogress_tasks(int participants) return 0; } -static int restore_wait_inprogress_tasks() +static int restore_wait_inprogress_tasks(void) { return __restore_wait_inprogress_tasks(0); } /* Wait all tasks except the current one */ -static int restore_wait_other_tasks() +static int restore_wait_other_tasks(void) { int participants, stage; @@ -1588,7 +1588,7 @@ static void restore_pgid(void) futex_set_and_wake(&rsti(current)->pgrp_set, 1); } -static int __legacy_mount_proc() +static int __legacy_mount_proc(void) { char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; int fd; @@ -1942,7 +1942,7 @@ static int catch_tasks(bool root_seized, enum trace_flags *flag) return 0; } -static int clear_breakpoints() +static int clear_breakpoints(void) { struct pstree_item *item; int ret = 0, i; diff --git a/criu/cr-service.c b/criu/cr-service.c index 549b3368b3..279016bcd3 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -1278,7 +1278,7 @@ static void reap_worker(int signo) } } -static int setup_sigchld_handler() +static int setup_sigchld_handler(void) { struct sigaction action; @@ -1295,7 +1295,7 @@ static int setup_sigchld_handler() return 0; } -static int restore_sigchld_handler() +static int restore_sigchld_handler(void) { struct sigaction action; diff --git a/criu/crtools.c b/criu/crtools.c index 1bf2d98c35..980e26a946 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -48,7 +48,7 @@ #include "sysctl.h" #include "img-remote.h" -void flush_early_log_to_stderr() __attribute__((destructor)); +void flush_early_log_to_stderr(void) __attribute__((destructor)); void flush_early_log_to_stderr(void) { diff --git a/criu/fault-injection.c b/criu/fault-injection.c index 4128814d52..4b06500083 100644 --- a/criu/fault-injection.c +++ b/criu/fault-injection.c @@ -3,7 +3,7 @@ enum faults fi_strategy; -int fault_injection_init() +int fault_injection_init(void) { char *val; int start; diff --git a/criu/img-remote.c b/criu/img-remote.c index 47c23faf4d..1160ba0f1d 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -432,7 +432,7 @@ static struct roperation *handle_accept_proxy_read(int cli_fd, return NULL; } -static inline void finish_local() +static inline void finish_local(void) { int ret; finished_local = true; @@ -726,7 +726,7 @@ static void handle_roperation(struct epoll_event *event, xfree(rop); } -static void check_pending() +static void check_pending(void) { struct roperation *rop = NULL; struct rimage *rimg = NULL; @@ -745,7 +745,7 @@ static void check_pending() } } -void accept_image_connections() { +void accept_image_connections(void) { int ret; epoll_fd = epoll_create(EPOLL_MAX_EVENTS); diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 53bd5edcbb..a7b040fbf3 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -159,7 +159,7 @@ extern struct cr_options opts; char *rpc_cfg_file; extern int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, int state); -extern int check_options(); -extern void init_opts(); +extern int check_options(void); +extern void init_opts(void); #endif /* __CR_OPTIONS_H__ */ diff --git a/criu/include/img-remote.h b/criu/include/img-remote.h index 66d75b90ff..1d2dd615c8 100644 --- a/criu/include/img-remote.h +++ b/criu/include/img-remote.h @@ -72,7 +72,7 @@ extern int local_sk; /* True if we are running the cache/restore, false if proxy/dump. */ extern bool restoring; -void accept_image_connections(); +void accept_image_connections(void); struct rimage *get_rimg_by_name(const char *snapshot_id, const char *path); int setup_UNIX_server_socket(char *path); @@ -91,8 +91,8 @@ int write_remote_image_connection(char *snapshot_id, char *path, int flags); * creates a new connection with a special control name. The receiver side uses * it to ack that no more files are coming. */ -int finish_remote_dump(); -int finish_remote_restore(); +int finish_remote_dump(void); +int finish_remote_restore(void); /* Starts an image proxy daemon (dump side). It receives image files through * socket connections and forwards them to the image cache (restore side). @@ -123,14 +123,14 @@ int skip_remote_bytes(int fd, unsigned long len); void init_snapshot_id(char *ns); /* Returns the current snapshot_id. */ -char *get_curr_snapshot_id(); +char *get_curr_snapshot_id(void); /* Returns the snapshot_id index representing the current snapshot_id. This * index represents the hierarchy position. For example: images tagged with * the snapshot_id with index 1 are more recent than the images tagged with * the snapshot_id with index 0. */ -int get_curr_snapshot_id_idx(); +int get_curr_snapshot_id_idx(void); /* Returns the snapshot_id associated with the snapshot_id index. */ char *get_snapshot_id_from_idx(int idx); @@ -138,9 +138,9 @@ char *get_snapshot_id_from_idx(int idx); /* Pushes the current snapshot_id into the snapshot_id hierarchy (into the Image * Proxy and Image Cache). */ -int push_snapshot_id(); +int push_snapshot_id(void); /* Returns the snapshot id index that precedes the current snapshot_id. */ -int get_curr_parent_snapshot_id_idx(); +int get_curr_parent_snapshot_id_idx(void); #endif diff --git a/criu/include/lsm.h b/criu/include/lsm.h index 3b82712829..a41915a4c5 100644 --- a/criu/include/lsm.h +++ b/criu/include/lsm.h @@ -39,7 +39,7 @@ extern int lsm_check_opts(void); #ifdef CONFIG_HAS_SELINUX int dump_xattr_security_selinux(int fd, FdinfoEntry *e); int run_setsockcreatecon(FdinfoEntry *e); -int reset_setsockcreatecon(); +int reset_setsockcreatecon(void); #else static inline int dump_xattr_security_selinux(int fd, FdinfoEntry *e) { return 0; @@ -47,7 +47,7 @@ static inline int dump_xattr_security_selinux(int fd, FdinfoEntry *e) { static inline int run_setsockcreatecon(FdinfoEntry *e) { return 0; } -static inline int reset_setsockcreatecon() { +static inline int reset_setsockcreatecon(void) { return 0; } #endif diff --git a/criu/include/mount.h b/criu/include/mount.h index d9b375f5d8..8bf19b2666 100644 --- a/criu/include/mount.h +++ b/criu/include/mount.h @@ -96,7 +96,7 @@ extern int collect_binfmt_misc(void); static inline int collect_binfmt_misc(void) { return 0; } #endif -extern struct mount_info *mnt_entry_alloc(); +extern struct mount_info *mnt_entry_alloc(void); extern void mnt_entry_free(struct mount_info *mi); extern int __mntns_get_root_fd(pid_t pid); diff --git a/criu/include/net.h b/criu/include/net.h index 9976f6eb06..0a556f3da2 100644 --- a/criu/include/net.h +++ b/criu/include/net.h @@ -31,7 +31,7 @@ extern int collect_net_namespaces(bool for_dump); extern int network_lock(void); extern void network_unlock(void); -extern int network_lock_internal(); +extern int network_lock_internal(void); extern struct ns_desc net_ns_desc; @@ -47,11 +47,11 @@ extern int move_veth_to_bridge(void); extern int kerndat_link_nsid(void); extern int net_get_nsid(int rtsk, int fd, int *nsid); -extern struct ns_id *net_get_root_ns(); +extern struct ns_id *net_get_root_ns(void); extern int kerndat_nsid(void); extern void check_has_netns_ioc(int fd, bool *kdat_val, const char *name); extern int net_set_ext(struct ns_id *ns); -extern struct ns_id *get_root_netns(); -extern int read_net_ns_img(); +extern struct ns_id *get_root_netns(void); +extern int read_net_ns_img(void); #endif /* __CR_NET_H__ */ diff --git a/criu/include/tls.h b/criu/include/tls.h index aa25178876..b48e4b4808 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -4,7 +4,7 @@ # ifdef CONFIG_GNUTLS int tls_x509_init(int sockfd, bool is_server); -void tls_terminate_session(); +void tls_terminate_session(void); ssize_t tls_send(const void *buf, size_t len, int flags); ssize_t tls_recv(void *buf, size_t len, int flags); diff --git a/criu/kerndat.c b/criu/kerndat.c index b0dd831356..d1afde71d3 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -364,7 +364,7 @@ static int kerndat_get_dirty_track(void) } /* The page frame number (PFN) is constant for the zero page */ -static int init_zero_page_pfn() +static int init_zero_page_pfn(void) { void *addr; int ret = 0; @@ -429,7 +429,7 @@ static int get_task_size(void) return 0; } -static int kerndat_fdinfo_has_lock() +static int kerndat_fdinfo_has_lock(void) { int fd, pfd = -1, exit_code = -1, len; char buf[PAGE_SIZE]; @@ -464,7 +464,7 @@ static int kerndat_fdinfo_has_lock() return exit_code; } -static int get_ipv6() +static int get_ipv6(void) { if (access("/proc/sys/net/ipv6", F_OK) < 0) { if (errno == ENOENT) { diff --git a/criu/lsm.c b/criu/lsm.c index 9d7e55c11b..060f102592 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -133,7 +133,7 @@ static int selinux_get_sockcreate_label(pid_t pid, char **output) return 0; } -int reset_setsockcreatecon() +int reset_setsockcreatecon(void) { /* Currently this only works for SELinux. */ if (kdat.lsm != LSMTYPE__SELINUX) diff --git a/criu/mount.c b/criu/mount.c index 24a8516c64..180f2a62dc 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2140,7 +2140,7 @@ static int restore_ext_mount(struct mount_info *mi) static char mnt_clean_path[] = "/tmp/cr-tmpfs.XXXXXX"; -static int mount_clean_path() +static int mount_clean_path(void) { /* * To make a bind mount, we need to have access to a source directory, @@ -2167,7 +2167,7 @@ static int mount_clean_path() return 0; } -static int umount_clean_path() +static int umount_clean_path(void) { if (umount2(mnt_clean_path, MNT_DETACH)) { pr_perror("Unable to umount %s", mnt_clean_path); @@ -2659,7 +2659,7 @@ static int find_remap_mounts(struct mount_info *root) } /* Move remapped mounts to places where they have to be */ -static int fixup_remap_mounts() +static int fixup_remap_mounts(void) { struct mnt_remap_entry *r; diff --git a/criu/namespaces.c b/criu/namespaces.c index 57f6bdfef4..21266df7c8 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -976,7 +976,7 @@ int dump_user_ns(pid_t pid, int ns_id) return exit_code; } -void free_userns_maps() +void free_userns_maps(void) { if (userns_entry.n_uid_map > 0) { xfree(userns_entry.uid_map[0]); diff --git a/criu/net.c b/criu/net.c index 8275fc6d3f..94c5a156fd 100644 --- a/criu/net.c +++ b/criu/net.c @@ -1765,7 +1765,7 @@ static int __restore_links(struct ns_id *nsid, int *nrlinks, int *nrcreated) return 0; } -static int restore_links() +static int restore_links(void) { int nrcreated, nrlinks; struct ns_id *nsid; @@ -2108,7 +2108,7 @@ static inline int restore_rule(int pid) * iptables-restore is executed from a target userns and it may have not enough * rights to open /run/xtables.lock. Here we try to workaround this problem. */ -static int prepare_xtable_lock() +static int prepare_xtable_lock(void) { int fd; @@ -2729,7 +2729,7 @@ static int iptables_restore(bool ipv6, char *buf, int size) return ret; } -int network_lock_internal() +int network_lock_internal(void) { char conf[] = "*filter\n" ":CRIU - [0:0]\n" @@ -2760,7 +2760,7 @@ int network_lock_internal() return ret; } -static int network_unlock_internal() +static int network_unlock_internal(void) { char conf[] = "*filter\n" ":CRIU - [0:0]\n" @@ -3313,7 +3313,7 @@ static int check_link_nsid(int rtsk, void *args) return do_rtnl_req(rtsk, &req, sizeof(req), check_one_link_nsid, NULL, NULL, args); } -int kerndat_link_nsid() +int kerndat_link_nsid(void) { int status; pid_t pid; diff --git a/criu/pstree.c b/criu/pstree.c index 92b4167aae..19cf5ad381 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -608,7 +608,7 @@ static int read_pstree_image(pid_t *pid_max) } #define RESERVED_PIDS 300 -static int get_free_pid() +static int get_free_pid(void) { static struct pid *prev, *next; diff --git a/criu/seize.c b/criu/seize.c index e1e6b81956..fd314666f0 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -194,7 +194,7 @@ static int seize_cgroup_tree(char *root_path, const char *state) * A freezer cgroup can contain tasks which will not be dumped * and we need to wait them, because the are interrupted them by ptrace. */ -static int freezer_wait_processes() +static int freezer_wait_processes(void) { int i; diff --git a/criu/tls.c b/criu/tls.c index db9cc4f5a7..f7b94dee8c 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -31,7 +31,7 @@ static gnutls_certificate_credentials_t x509_cred; static int tls_sk = -1; static int tls_sk_flags = 0; -void tls_terminate_session() +void tls_terminate_session(void) { int ret; @@ -227,7 +227,7 @@ static int tls_x509_verify_peer_cert(void) return 0; } -static int tls_handshake() +static int tls_handshake(void) { int ret = -1; while (ret != GNUTLS_E_SUCCESS) { @@ -241,7 +241,7 @@ static int tls_handshake() return 0; } -static int tls_x509_setup_creds() +static int tls_x509_setup_creds(void) { int ret; char *cacert = CRIU_CACERT; diff --git a/criu/util.c b/criu/util.c index 8cd9f38dc8..f85c93a0cc 100644 --- a/criu/util.c +++ b/criu/util.c @@ -325,7 +325,7 @@ int close_pid_proc(void) return 0; } -void close_proc() +void close_proc(void) { close_pid_proc(); close_service_fd(PROC_FD_OFF); @@ -712,7 +712,7 @@ int cr_daemon(int nochdir, int noclose, int close_fd) return 0; } -int is_root_user() +int is_root_user(void) { if (geteuid() != 0) { pr_err("You need to be root to run this command\n"); diff --git a/soccr/test/tcp-conn.c b/soccr/test/tcp-conn.c index 1a1a5bb395..e31f58e7ec 100644 --- a/soccr/test/tcp-conn.c +++ b/soccr/test/tcp-conn.c @@ -23,7 +23,7 @@ static void pr_printf(unsigned int level, const char *fmt, ...) va_end(args); } -int main() +int main(void) { union libsoccr_addr addr, dst; int srv, sock, clnt, rst; diff --git a/soccr/test/tcp-constructor.c b/soccr/test/tcp-constructor.c index 89f2010001..973dbf10c8 100644 --- a/soccr/test/tcp-constructor.c +++ b/soccr/test/tcp-constructor.c @@ -20,7 +20,7 @@ struct tcp { uint16_t wscale; }; -static void usage() +static void usage(void) { printf( "Usage: --addr ADDR -port PORT --seq SEQ --next --addr ADDR -port PORT --seq SEQ -- CMD ...\n" diff --git a/test/others/unix-callback/unix-client.c b/test/others/unix-callback/unix-client.c index 69808b53cf..676c4adbc8 100644 --- a/test/others/unix-callback/unix-client.c +++ b/test/others/unix-callback/unix-client.c @@ -86,7 +86,7 @@ static int check_sock(int i) return 0; } -int main() +int main(void) { int i, fd; sigset_t set; diff --git a/test/others/unix-callback/unix-server.c b/test/others/unix-callback/unix-server.c index 8f32f53dd9..47bebd05d5 100644 --- a/test/others/unix-callback/unix-server.c +++ b/test/others/unix-callback/unix-server.c @@ -19,7 +19,7 @@ struct ticket *tickets; #define SK_NAME "/tmp/criu.unix.callback.test" -int main() +int main(void) { int sk, ret, id; char buf[4096]; diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 6958d128e3..43763321f9 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -38,7 +38,7 @@ ifeq ($(origin CC), default) CC := $(CROSS_COMPILE)$(HOSTCC) endif CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -CFLAGS += -Wdeclaration-after-statement +CFLAGS += -Wdeclaration-after-statement -Wstrict-prototypes CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c index a1bdfc1b4f..630476de0e 100644 --- a/test/zdtm/lib/test.c +++ b/test/zdtm/lib/test.c @@ -71,7 +71,7 @@ static void test_fini(void) unlinkat(cwd, pidfile, 0); } -static void setup_outfile() +static void setup_outfile(void) { if (!access(outfile, F_OK) || errno != ENOENT) { fprintf(stderr, "Output file %s appears to exist, aborting\n", @@ -93,7 +93,7 @@ static void setup_outfile() exit(1); } -static void redir_stdfds() +static void redir_stdfds(void) { int nullfd; @@ -346,7 +346,7 @@ void test_init(int argc, char **argv) srand48(time(NULL)); /* just in case we need it */ } -void test_daemon() +void test_daemon(void) { futex_set_and_wake(&test_shared_state->stage, TEST_RUNNING_STAGE); } diff --git a/test/zdtm/static/apparmor.c b/test/zdtm/static/apparmor.c index 15930c7618..b3a4d75498 100644 --- a/test/zdtm/static/apparmor.c +++ b/test/zdtm/static/apparmor.c @@ -15,7 +15,7 @@ const char *test_author = "Tycho Andersen "; #define PROFILE "criu_test" -int setprofile() +int setprofile(void) { char profile[1024]; int fd, len; @@ -45,7 +45,7 @@ int setprofile() return 0; } -int checkprofile() +int checkprofile(void) { FILE *f; char path[PATH_MAX], profile[1024]; diff --git a/test/zdtm/static/child_subreaper_and_reparent.c b/test/zdtm/static/child_subreaper_and_reparent.c index 57943a67b7..e3955d3d94 100644 --- a/test/zdtm/static/child_subreaper_and_reparent.c +++ b/test/zdtm/static/child_subreaper_and_reparent.c @@ -25,7 +25,7 @@ struct shared { int parent_after_cr; } *sh; -int orphan() +int orphan(void) { /* * Wait until reparented to the pidns init. (By waiting @@ -45,7 +45,7 @@ int orphan() return 0; } -int helper() +int helper(void) { int pid; @@ -59,7 +59,7 @@ int helper() return 0; } -int subreaper() +int subreaper(void) { int pid, ret, status; diff --git a/test/zdtm/static/child_subreaper_existing_child.c b/test/zdtm/static/child_subreaper_existing_child.c index 28e9dbb8ae..8291aba087 100644 --- a/test/zdtm/static/child_subreaper_existing_child.c +++ b/test/zdtm/static/child_subreaper_existing_child.c @@ -24,7 +24,7 @@ struct shared { } *sh; -int orphan() +int orphan(void) { /* Return the control back to MAIN worker to do C/R */ futex_set_and_wake(&sh->fstate, TEST_CRIU); @@ -36,7 +36,7 @@ int orphan() return 0; } -int helper() +int helper(void) { int pid; @@ -52,7 +52,7 @@ int helper() return 0; } -int subreaper() +int subreaper(void) { int pid, ret, status; diff --git a/test/zdtm/static/dumpable02.c b/test/zdtm/static/dumpable02.c index 024371bd88..7e2eee2d1e 100644 --- a/test/zdtm/static/dumpable02.c +++ b/test/zdtm/static/dumpable02.c @@ -13,7 +13,7 @@ const char *test_doc = "Check dumpable flag handling (non-dumpable case)"; const char *test_author = "Filipe Brandenburger "; -int dumpable_server() { +int dumpable_server(void) { char buf[256]; int ret; diff --git a/test/zdtm/static/fdt_shared.c b/test/zdtm/static/fdt_shared.c index 2111356f53..a84444af5e 100644 --- a/test/zdtm/static/fdt_shared.c +++ b/test/zdtm/static/fdt_shared.c @@ -22,7 +22,7 @@ TEST_OPTION(filename, string, "file name", 1); #define CHILDREN 4 static int fork_pfd[2]; -static void forked() +static void forked(void) { char c = 0; @@ -32,7 +32,7 @@ static void forked() } } -static void wait_children() +static void wait_children(void) { int i; char c; diff --git a/test/zdtm/static/file_locks00.c b/test/zdtm/static/file_locks00.c index 59e19cfe1d..fa98a31b3d 100644 --- a/test/zdtm/static/file_locks00.c +++ b/test/zdtm/static/file_locks00.c @@ -101,7 +101,7 @@ static int check_write_lock(int fd, int whence, off_t offset, off_t len) return -1; } -static int check_file_locks() +static int check_file_locks(void) { int fd_0, fd_1; int ret0, ret1; diff --git a/test/zdtm/static/inotify_system.c b/test/zdtm/static/inotify_system.c index 59f47c41c8..3e6b2ad48b 100644 --- a/test/zdtm/static/inotify_system.c +++ b/test/zdtm/static/inotify_system.c @@ -68,7 +68,7 @@ typedef struct { int dir; } desc; -void do_wait() { +void do_wait(void) { test_daemon(); test_waitsig(); } diff --git a/test/zdtm/static/maps00.c b/test/zdtm/static/maps00.c index a6c68cd25b..f2da9b9756 100644 --- a/test/zdtm/static/maps00.c +++ b/test/zdtm/static/maps00.c @@ -123,7 +123,7 @@ static void segfault(int signo) * after test func should be placed check map, because size of test_func * is calculated as (check_map-test_func) */ -int test_func() +int test_func(void) { return 1; } @@ -176,8 +176,9 @@ static int check_map(struct map *map) memcpy(map->ptr,test_func, getpagesize()); } else { if (!(map->flag & MAP_ANONYMOUS)) { + uint8_t funlen = (uint8_t *)check_map - (uint8_t *)test_func; lseek(map->fd,0,SEEK_SET); - if (write(map->fd,test_func,check_map - test_func)fd,test_func,funlen)filename); return -1; } @@ -185,7 +186,7 @@ static int check_map(struct map *map) } if (!(map->flag & MAP_ANONYMOUS) || map->prot & PROT_WRITE) /* Function body has been copied into the mapping */ - ((int (*)())map->ptr)(); /* perform exec access */ + ((int (*)(void))map->ptr)(); /* perform exec access */ else /* No way to copy function body into mapping, * clear exec bit from effective protection diff --git a/test/zdtm/static/selinux00.c b/test/zdtm/static/selinux00.c index db8420eacb..b5b3e3cc00 100644 --- a/test/zdtm/static/selinux00.c +++ b/test/zdtm/static/selinux00.c @@ -26,14 +26,14 @@ const char *test_author = "Adrian Reber "; */ char state; -int check_for_selinux() +int check_for_selinux(void) { if (access("/sys/fs/selinux", F_OK) == 0) return 0; return 1; } -int setprofile() +int setprofile(void) { int fd, len; @@ -54,7 +54,7 @@ int setprofile() return 0; } -int checkprofile() +int checkprofile(void) { int fd; char context[1024]; @@ -83,7 +83,7 @@ int checkprofile() return 0; } -int check_sockcreate() +int check_sockcreate(void) { char *output = NULL; FILE *f = fopen("/proc/self/attr/sockcreate", "r"); diff --git a/test/zdtm/static/selinux01.c b/test/zdtm/static/selinux01.c index cec5980e88..cbf145d2a0 100644 --- a/test/zdtm/static/selinux01.c +++ b/test/zdtm/static/selinux01.c @@ -28,14 +28,14 @@ const char *test_author = "Adrian Reber "; */ char state; -int check_for_selinux() +int check_for_selinux(void) { if (access("/sys/fs/selinux", F_OK) == 0) return 0; return 1; } -int setprofile() +int setprofile(void) { int fd, len; @@ -56,7 +56,7 @@ int setprofile() return 0; } -int set_sockcreate() +int set_sockcreate(void) { int fd, len; @@ -77,7 +77,7 @@ int set_sockcreate() return 0; } -int check_sockcreate() +int check_sockcreate(void) { int fd; char context[1024]; @@ -106,7 +106,7 @@ int check_sockcreate() return 0; } -int check_sockcreate_empty() +int check_sockcreate_empty(void) { char *output = NULL; FILE *f = fopen("/proc/self/attr/sockcreate", "r"); diff --git a/test/zdtm/static/session02.c b/test/zdtm/static/session02.c index 37f245d2e8..f5c81df161 100644 --- a/test/zdtm/static/session02.c +++ b/test/zdtm/static/session02.c @@ -25,7 +25,7 @@ struct process *processes; int nr_processes = 20; int current = 0; -static void cleanup() +static void cleanup(void) { int i; @@ -55,9 +55,9 @@ struct command int arg2; }; -static void handle_command(); +static void handle_command(void); -static void mainloop() +static void mainloop(void) { while (1) handle_command(); @@ -100,7 +100,7 @@ static int make_child(int id, int flags) return cid; } -static void handle_command() +static void handle_command(void) { int sk = processes[current].sks[0], ret, status = 0; struct command cmd; diff --git a/test/zdtm/static/session03.c b/test/zdtm/static/session03.c index 2b3c46c326..8ca16e4102 100644 --- a/test/zdtm/static/session03.c +++ b/test/zdtm/static/session03.c @@ -36,7 +36,7 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data) waitpid(pid, NULL, WNOHANG); } -static void cleanup() +static void cleanup(void) { int i, ret; @@ -72,7 +72,7 @@ enum commands int cmd_weght[TEST_MAX] = {10, 3, 1, 10, 7}; int sum_weight = 0; -static int get_rnd_op() +static int get_rnd_op(void) { int i, m; if (sum_weight == 0) { @@ -97,9 +97,9 @@ struct command int arg2; }; -static void handle_command(); +static void handle_command(void); -static void mainloop() +static void mainloop(void) { while (1) handle_command(); @@ -142,7 +142,7 @@ static int make_child(int id, int flags) return cid; } -static void handle_command() +static void handle_command(void) { int sk = processes[current].sks[0], ret, status = 0; struct command cmd; diff --git a/test/zdtm/transition/netlink00.c b/test/zdtm/transition/netlink00.c index c9b2303e81..3504a48a12 100644 --- a/test/zdtm/transition/netlink00.c +++ b/test/zdtm/transition/netlink00.c @@ -56,12 +56,12 @@ struct rtmsg *rtp; int rtl; struct rtattr *rtap; -int send_request(); -int recv_reply(); -int form_request_add(); -int form_request_del(); -int read_reply(); -typedef int (*cmd_t)(); +int send_request(void); +int recv_reply(void); +int form_request_add(void); +int form_request_del(void); +int read_reply(void); +typedef int (*cmd_t)(void); #define CMD_NUM 2 cmd_t cmd[CMD_NUM]={form_request_add, form_request_del}; @@ -120,7 +120,7 @@ int main(int argc, char *argv[]) return 0; } -int send_request() +int send_request(void) { // create the remote address // to communicate @@ -145,7 +145,7 @@ int send_request() } return 0; } -int recv_reply() +int recv_reply(void) { char *p; // initialize the socket read buffer @@ -191,7 +191,7 @@ int recv_reply() return 0; } -int read_reply() +int read_reply(void) { //string to hold content of the route // table (i.e. one entry) @@ -250,7 +250,7 @@ int read_reply() #define NLMSG_TAIL(nmsg) \ ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) -int form_request_del() +int form_request_del(void) { bzero(&req, sizeof(req)); req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); @@ -272,7 +272,7 @@ int form_request_del() return 0; } -int form_request_add() +int form_request_add(void) { int ifcn = 1; //interface number From faf310c3db26d3bf0b15b5402b9b9e7592416ce1 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 11 Nov 2019 19:07:52 +0300 Subject: [PATCH 177/277] net: add nftables c/r After Centos-8 nft used instead of iptables. But we had never supported nft rules in CRIU, and after c/r all rules are flushed. Co-developed-by: Pavel Tikhomirov Signed-off-by: Pavel Tikhomirov Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Dmitry Safonov --- Makefile.config | 17 ++++ criu/image-desc.c | 1 + criu/include/image-desc.h | 1 + criu/include/magic.h | 1 + criu/net.c | 115 +++++++++++++++++++++++++++ scripts/build/Dockerfile.alpine | 1 + scripts/build/Dockerfile.fedora.tmpl | 2 + scripts/feature-tests.mak | 23 ++++++ 8 files changed, 161 insertions(+) diff --git a/Makefile.config b/Makefile.config index 81aae24f81..161365960e 100644 --- a/Makefile.config +++ b/Makefile.config @@ -23,6 +23,23 @@ else $(info Note: Building without GnuTLS support) endif +ifeq ($(call pkg-config-check,libnftables),y) + LIB_NFTABLES := $(shell pkg-config --libs libnftables) + ifeq ($(call try-cc,$(FEATURE_TEST_NFTABLES_LIB_API_0),$(LIB_NFTABLES)),true) + LIBS_FEATURES += $(LIB_NFTABLES) + FEATURE_DEFINES += -DCONFIG_HAS_NFTABLES_LIB_API_0 + else ifeq ($(call try-cc,$(FEATURE_TEST_NFTABLES_LIB_API_1),$(LIB_NFTABLES)),true) + LIBS_FEATURES += $(LIB_NFTABLES) + FEATURE_DEFINES += -DCONFIG_HAS_NFTABLES_LIB_API_1 + else + $(warning Warn: you have libnftables installed but it has incompatible API) + $(warning Warn: Building without nftables support) + endif +else + $(warning Warn: you have no libnftables installed) + $(warning Warn: Building without nftables support) +endif + export LIBS += $(LIBS_FEATURES) CONFIG_FILE = .config diff --git a/criu/image-desc.c b/criu/image-desc.c index 81cd074840..ae5d817fea 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -76,6 +76,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY_F(RULE, "rule-%u", O_NOBUF), FD_ENTRY_F(IPTABLES, "iptables-%u", O_NOBUF), FD_ENTRY_F(IP6TABLES, "ip6tables-%u", O_NOBUF), + FD_ENTRY_F(NFTABLES, "nftables-%u", O_NOBUF), FD_ENTRY_F(TMPFS_IMG, "tmpfs-%u.tar.gz", O_NOBUF), FD_ENTRY_F(TMPFS_DEV, "tmpfs-dev-%u.tar.gz", O_NOBUF), FD_ENTRY_F(AUTOFS, "autofs-%u", O_NOBUF), diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index fea80a719b..6db8bf94f5 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -42,6 +42,7 @@ enum { CR_FD_RULE, CR_FD_IPTABLES, CR_FD_IP6TABLES, + CR_FD_NFTABLES, CR_FD_NETNS, CR_FD_NETNF_CT, CR_FD_NETNF_EXP, diff --git a/criu/include/magic.h b/criu/include/magic.h index 05101f436c..1a583f4ed7 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -103,6 +103,7 @@ #define TMPFS_DEV_MAGIC RAW_IMAGE_MAGIC #define IPTABLES_MAGIC RAW_IMAGE_MAGIC #define IP6TABLES_MAGIC RAW_IMAGE_MAGIC +#define NFTABLES_MAGIC RAW_IMAGE_MAGIC #define NETNF_CT_MAGIC RAW_IMAGE_MAGIC #define NETNF_EXP_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/net.c b/criu/net.c index 94c5a156fd..86fba2ddc5 100644 --- a/criu/net.c +++ b/criu/net.c @@ -17,6 +17,10 @@ #include #include +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) +#include +#endif + #ifdef CONFIG_HAS_SELINUX #include #endif @@ -1897,6 +1901,55 @@ static inline int dump_iptables(struct cr_imgset *fds) return 0; } +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) +static inline int dump_nftables(struct cr_imgset *fds) +{ + int ret = -1; + struct cr_img *img; + int img_fd; + FILE *fp; + struct nft_ctx *nft; + + nft = nft_ctx_new(NFT_CTX_DEFAULT); + if (!nft) + return -1; + + img = img_from_set(fds, CR_FD_NFTABLES); + img_fd = dup(img_raw_fd(img)); + if (img_fd < 0) { + pr_perror("dup() failed"); + goto nft_ctx_free_out; + } + + fp = fdopen(img_fd, "w"); + if (!fp) { + pr_perror("fdopen() failed"); + close(img_fd); + goto nft_ctx_free_out; + } + + nft_ctx_set_output(nft, fp); +#define DUMP_NFTABLES_CMD "list ruleset" +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) + if (nft_run_cmd_from_buffer(nft, DUMP_NFTABLES_CMD, strlen(DUMP_NFTABLES_CMD))) +#elif defined(CONFIG_HAS_NFTABLES_LIB_API_1) + if (nft_run_cmd_from_buffer(nft, DUMP_NFTABLES_CMD)) +#else + BUILD_BUG_ON(1); +#endif + goto fp_close_out; + + ret = 0; + +fp_close_out: + fclose(fp); +nft_ctx_free_out: + nft_ctx_free(nft); + + return ret; +} +#endif + static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) { void *buf, *o_buf; @@ -2178,6 +2231,60 @@ static inline int restore_iptables(int pid) return ret; } +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) +static inline int restore_nftables(int pid) +{ + int ret = -1; + struct cr_img *img; + struct nft_ctx *nft; + off_t img_data_size; + char *buf; + + img = open_image(CR_FD_NFTABLES, O_RSTR, pid); + if (img == NULL) + return -1; + if (empty_image(img)) { + /* Backward compatibility */ + pr_info("Skipping nft restore, no image"); + ret = 0; + goto image_close_out; + } + + if ((img_data_size = img_raw_size(img)) < 0) + goto image_close_out; + + if (read_img_str(img, &buf, img_data_size) < 0) + goto image_close_out; + + nft = nft_ctx_new(NFT_CTX_DEFAULT); + if (!nft) + goto buf_free_out; + + if (nft_ctx_buffer_output(nft) || nft_ctx_buffer_error(nft) || +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) + nft_run_cmd_from_buffer(nft, buf, strlen(buf))) +#elif defined(CONFIG_HAS_NFTABLES_LIB_API_1) + nft_run_cmd_from_buffer(nft, buf)) +#else + { + BUILD_BUG_ON(1); + } +#endif + goto nft_ctx_free_out; + + ret = 0; + +nft_ctx_free_out: + nft_ctx_free(nft); +buf_free_out: + xfree(buf); +image_close_out: + close_image(img); + + return ret; +} +#endif + int read_net_ns_img(void) { struct ns_id *ns; @@ -2409,6 +2516,10 @@ int dump_net_ns(struct ns_id *ns) ret = dump_rule(fds); if (!ret) ret = dump_iptables(fds); +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) + if (!ret) + ret = dump_nftables(fds); +#endif if (!ret) ret = dump_netns_conf(ns, fds); } else if (ns->type != NS_ROOT) { @@ -2502,6 +2613,10 @@ static int prepare_net_ns_second_stage(struct ns_id *ns) ret = restore_rule(nsid); if (!ret) ret = restore_iptables(nsid); +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) + if (!ret) + ret = restore_nftables(nsid); +#endif } if (!ret) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index a1d1d91916..29a7540580 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -14,6 +14,7 @@ RUN apk update && apk add \ libcap-dev \ libnet-dev \ libnl3-dev \ + nftables \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 0500a8fc55..138588bce1 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -10,6 +10,8 @@ RUN dnf install -y \ gnutls-devel \ iproute \ iptables \ + nftables \ + nftables-devel \ libaio-devel \ libasan \ libcap-devel \ diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 39ddfd0533..6f67c6035f 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -147,4 +147,27 @@ int main(void) return 0; return 0; } + +endef + +define FEATURE_TEST_NFTABLES_LIB_API_0 + +#include + +int main(int argc, char **argv) +{ + return nft_run_cmd_from_buffer(nft_ctx_new(NFT_CTX_DEFAULT), \"cmd\", strlen(\"cmd\")); +} + +endef + +define FEATURE_TEST_NFTABLES_LIB_API_1 + +#include + +int main(int argc, char **argv) +{ + return nft_run_cmd_from_buffer(nft_ctx_new(NFT_CTX_DEFAULT), \"cmd\"); +} + endef From ec1690d8ab34a244221e7bdcadb2e04e36570750 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 11 Nov 2019 19:20:22 +0300 Subject: [PATCH 178/277] zdtm: nft tables preservation test Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn [Added test_author to zdtm test] Signed-off-by: Dmitry Safonov --- scripts/build/Dockerfile.alpine | 1 + test/zdtm/static/Makefile | 1 + test/zdtm/static/netns-nft.c | 64 ++++++++++++++++++++++++++++ test/zdtm/static/netns-nft.checkskip | 3 ++ test/zdtm/static/netns-nft.desc | 5 +++ 5 files changed, 74 insertions(+) create mode 100644 test/zdtm/static/netns-nft.c create mode 100755 test/zdtm/static/netns-nft.checkskip create mode 100644 test/zdtm/static/netns-nft.desc diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 29a7540580..601a8693a0 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -31,6 +31,7 @@ RUN apk add \ py-pip \ ip6tables \ iptables \ + nftables \ iproute2 \ tar \ bash \ diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index ea5d3c42e6..19d93e315b 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -288,6 +288,7 @@ TST_FILE = \ file_locks07 \ file_locks08 \ netns-nf \ + netns-nft \ maps_file_prot \ socket_close_data01 \ diff --git a/test/zdtm/static/netns-nft.c b/test/zdtm/static/netns-nft.c new file mode 100644 index 0000000000..f4991afda8 --- /dev/null +++ b/test/zdtm/static/netns-nft.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check that nft rules (some) are kept"; +const char *test_author = "Alexander Mikhalitsyn "; + +char *filename; +TEST_OPTION(filename, string, "file name", 1); + +int main(int argc, char **argv) +{ + char cmd[128]; + + test_init(argc, argv); + + /* create nft table */ + if (system("nft add table inet netns-nft-zdtm-test")) { + pr_perror("Can't create nft table"); + return -1; + } + + /* create input chain in table */ + if (system("nft add chain inet netns-nft-zdtm-test input { type filter hook input priority 0 \\; }")) { + pr_perror("Can't create input chain in nft table"); + return -1; + } + + /* block ICMPv4 traffic */ + if (system("nft add rule inet netns-nft-zdtm-test input meta nfproto ipv4 icmp type { echo-request } reject")) { + pr_perror("Can't set input rule"); + return -1; + } + + /* save resulting nft table */ + sprintf(cmd, "nft list table inet netns-nft-zdtm-test > pre-%s", filename); + if (system(cmd)) { + pr_perror("Can't get nft table"); + return -1; + } + + test_daemon(); + test_waitsig(); + + /* get nft table */ + sprintf(cmd, "nft list table inet netns-nft-zdtm-test > post-%s", filename); + if (system(cmd)) { + fail("Can't get nft table"); + return -1; + } + + /* compare nft table before/after c/r */ + sprintf(cmd, "diff pre-%s post-%s", filename, filename); + if (system(cmd)) { + fail("nft table differ"); + return -1; + } + + pass(); + return 0; +} diff --git a/test/zdtm/static/netns-nft.checkskip b/test/zdtm/static/netns-nft.checkskip new file mode 100755 index 0000000000..270cafeb55 --- /dev/null +++ b/test/zdtm/static/netns-nft.checkskip @@ -0,0 +1,3 @@ +#!/bin/bash + +test -f /usr/sbin/nft || exit 1 diff --git a/test/zdtm/static/netns-nft.desc b/test/zdtm/static/netns-nft.desc new file mode 100644 index 0000000000..f53890a245 --- /dev/null +++ b/test/zdtm/static/netns-nft.desc @@ -0,0 +1,5 @@ +{ 'deps': [ '/bin/sh', + '/usr/sbin/nft', + '/usr/bin/diff'], + 'flags': 'suid', + 'flavor': 'ns uns'} From 6905c22cdf86218db6f5b7184f77390d15b0cc44 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 179/277] files: fix ghost file error path Signed-off-by: Nicolas Viennot --- criu/files-reg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 1b51d1088e..90a90024f2 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -885,10 +885,13 @@ static int dump_ghost_remap(char *path, const struct stat *st, gf->dev = phys_dev; gf->ino = st->st_ino; gf->id = ghost_file_ids++; - list_add_tail(&gf->list, &ghost_files); - if (dump_ghost_file(lfd, gf->id, st, phys_dev)) + if (dump_ghost_file(lfd, gf->id, st, phys_dev)) { + xfree(gf); return -1; + } + + list_add_tail(&gf->list, &ghost_files); dump_entry: rpe.orig_id = id; From e6946e2fde25c20e4b64edc66468716e54952857 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Dec 2019 20:27:40 +0000 Subject: [PATCH 180/277] python: Improve decoding of file flags Signed-off-by: Nicolas Viennot --- lib/py/images/pb2dict.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index daaa7297ea..6fce4be22d 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -105,11 +105,22 @@ def _custom_conv(field): ] rfile_flags_map = [ - ('O_WRONLY', 0o1), - ('O_RDWR', 0o2), - ('O_APPEND', 0o2000), - ('O_DIRECT', 0o40000), - ('O_LARGEFILE', 0o100000), + ('O_WRONLY', 0o00000001), + ('O_RDWR', 0o00000002), + ('O_CREAT', 0o00000100), + ('O_EXCL', 0o00000200), + ('O_NOCTTY', 0o00000400), + ('O_TRUNC', 0o00001000), + ('O_APPEND', 0o00002000), + ('O_NONBLOCK', 0o00004000), + ('O_DSYNC', 0o00010000), + ('FASYNC', 0o00020000), + ('O_DIRECT', 0o00040000), + ('O_LARGEFILE', 0o00100000), + ('O_DIRECTORY', 0o00200000), + ('O_NOFOLLOW', 0o00400000), + ('O_NOATIME', 0o01000000), + ('O_CLOEXEC', 0o02000000), ] pmap_flags_map = [ From 4470237070de57e9947774a736a0ccbebc05fbd2 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Dec 2019 20:29:27 +0000 Subject: [PATCH 181/277] files: Remove O_CLOEXEC from file flags The kernel artificially adds the O_CLOEXEC flag when reading from the /proc/fdinfo/fd interface if FD_CLOEXEC is set on the file descriptor used to access the file. This commit removes the O_CLOEXEC flag in our file flags. To restore the proper FD_CLOEXEC value in each of the file descriptors, CRIU uses fcntl(F_GETFD) to retrieve the FD_CLOEXEC status, and restore it later with fcntl(F_SETFD). This is necessary because multiple file descriptors may point to the same open file. --- criu/files.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/criu/files.c b/criu/files.c index ffdaa459fc..e268978704 100644 --- a/criu/files.c +++ b/criu/files.c @@ -382,7 +382,13 @@ static int fill_fd_params(struct pid *owner_pid, int fd, int lfd, p->fs_type = fsbuf.f_type; p->fd = fd; p->pos = fdinfo.pos; - p->flags = fdinfo.flags; + /* + * The kernel artificially adds the O_CLOEXEC flag on the file pointer + * flags by looking at the flags on the file descriptor (see kernel + * code fs/proc/fd.c). FD_CLOEXEC is a file descriptor property, which + * is saved in fd_flags. + */ + p->flags = fdinfo.flags & ~O_CLOEXEC; p->mnt_id = fdinfo.mnt_id; p->pid = owner_pid->real; p->fd_flags = opts->flags; From 74f8c75e37a0766051e775efd7030d8392d022d0 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Dec 2019 20:21:03 +0000 Subject: [PATCH 182/277] files: Add FD_CLOEXEC test --- test/zdtm/static/Makefile | 1 + test/zdtm/static/file_cloexec.c | 63 +++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 test/zdtm/static/file_cloexec.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 19d93e315b..5ca05ee9ef 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -122,6 +122,7 @@ TST_NOFILE := \ groups \ pdeath_sig \ file_fown \ + file_cloexec \ proc-self \ eventfs00 \ epoll \ diff --git a/test/zdtm/static/file_cloexec.c b/test/zdtm/static/file_cloexec.c new file mode 100644 index 0000000000..b8eba39e54 --- /dev/null +++ b/test/zdtm/static/file_cloexec.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check FD_CLOEXEC flag"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static void assert_fd_flags(int fd, int mask, int value) +{ + int flags = fcntl(fd, F_GETFD); + if (flags == -1) + err(1, "Can't get fd flags"); + + if ((flags & mask) != value) { + fail("fd flags mismatch"); + exit(1); + } +} + +int main(int argc, char *argv[]) +{ + int fd1, fd2, fd3, fd4; + + test_init(argc, argv); + + fd1 = open("/", O_RDONLY | O_CLOEXEC); + if (fd1 < 0) + err(1, "Can't open()"); + + fd2 = open("/", O_RDONLY); + if (fd2 < 0) + err(1, "Can't open()"); + + fd3 = dup(fd1); + if (fd3 < 0) + err(1, "Can't dup()"); + + fd4 = fcntl(fd2, F_DUPFD_CLOEXEC, 0); + if (fd4 < 0) + err(1, "Can't dup()"); + + test_daemon(); + test_waitsig(); + + assert_fd_flags(fd1, FD_CLOEXEC, FD_CLOEXEC); + assert_fd_flags(fd2, FD_CLOEXEC, 0); + assert_fd_flags(fd3, FD_CLOEXEC, 0); + assert_fd_flags(fd4, FD_CLOEXEC, FD_CLOEXEC); + + pass(); + + return 0; +} From de7690d7a72ff05fdbbde6a88da8dacc9ca849e8 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 9 Jan 2020 10:31:41 +0000 Subject: [PATCH 183/277] travis: switch alpine to python3 Now that Python 2 has officially reached its end of life also switch the Alpine based test to Python 3. Signed-off-by: Adrian Reber --- scripts/build/Dockerfile.alpine | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 601a8693a0..5785102dac 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -18,7 +18,7 @@ RUN apk update && apk add \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ - python \ + python3 \ sudo COPY . /criu @@ -28,7 +28,6 @@ RUN mv .ccache /tmp && make mrproper && ccache -sz && \ date && make -j $(nproc) CC="$CC" && date && ccache -s RUN apk add \ - py-pip \ ip6tables \ iptables \ nftables \ @@ -37,10 +36,16 @@ RUN apk add \ bash \ go \ e2fsprogs \ + py-yaml \ + py3-flake8 \ asciidoctor # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip install PyYAML future protobuf ipaddress junit_xml flake8 +RUN pip3 install protobuf junit_xml + +# For zdtm we need an unversioned python binary +RUN ln -s /usr/bin/python3 /usr/bin/python + RUN make -C test/zdtm From 0d75cd9338472bcfc526be6b5d8b0ea5f477aae0 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Sun, 8 Dec 2019 10:14:40 +0530 Subject: [PATCH 184/277] Add Socket-based Java Functional Tests Signed-off-by: Nidhi Gupta --- scripts/build/Dockerfile.openj9-ubuntu | 1 + test/javaTests/README.md | 11 +- .../criu/java/tests/CheckpointRestore.java | 1 + .../src/org/criu/java/tests/Helper.java | 2 +- .../src/org/criu/java/tests/SocketHelper.java | 100 ++++++++ .../src/org/criu/java/tests/Sockets.java | 141 ++++++++++++ .../org/criu/java/tests/SocketsClient.java | 133 +++++++++++ .../org/criu/java/tests/SocketsConnect.java | 157 +++++++++++++ .../criu/java/tests/SocketsConnectClient.java | 130 +++++++++++ .../criu/java/tests/SocketsConnectServer.java | 151 ++++++++++++ .../src/org/criu/java/tests/SocketsData.java | 156 +++++++++++++ .../criu/java/tests/SocketsDataClient.java | 141 ++++++++++++ .../criu/java/tests/SocketsDataServer.java | 124 ++++++++++ .../org/criu/java/tests/SocketsListen.java | 153 +++++++++++++ .../criu/java/tests/SocketsListenClient.java | 136 +++++++++++ .../criu/java/tests/SocketsListenServer.java | 160 +++++++++++++ .../org/criu/java/tests/SocketsMultiple.java | 152 +++++++++++++ .../java/tests/SocketsMultipleClient.java | 174 ++++++++++++++ .../java/tests/SocketsMultipleServer.java | 215 ++++++++++++++++++ .../org/criu/java/tests/SocketsServer.java | 142 ++++++++++++ test/javaTests/test.xml | 46 ++++ 21 files changed, 2424 insertions(+), 2 deletions(-) create mode 100644 test/javaTests/src/org/criu/java/tests/SocketHelper.java create mode 100644 test/javaTests/src/org/criu/java/tests/Sockets.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsConnect.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsData.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsDataClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsDataServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsListen.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsListenClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsListenServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsMultiple.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsServer.java diff --git a/scripts/build/Dockerfile.openj9-ubuntu b/scripts/build/Dockerfile.openj9-ubuntu index 13d9080ff2..f235cc0047 100644 --- a/scripts/build/Dockerfile.openj9-ubuntu +++ b/scripts/build/Dockerfile.openj9-ubuntu @@ -18,6 +18,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends protobuf-c-comp make \ git \ pkg-config \ + iptables \ gcc \ maven diff --git a/test/javaTests/README.md b/test/javaTests/README.md index 6707416779..4315b9b124 100644 --- a/test/javaTests/README.md +++ b/test/javaTests/README.md @@ -30,7 +30,16 @@ Here we test the File-Based Java APIs by checkpointing the application in the fo ## Memory mapping Java APIs Here we test the Memory Mapping APIs by checkpointing the application in following scenario and verifying the contents after restore: -- Memory-mapping a file and writing its content to another file. (MemoryMappings.java) +- Memory-mapping a file and writing its content to another file. (MemoryMappings.java) + +## Socket-based Java APIs + +Here we test the Socket-based API's by checkpointing the application in the following scenario and verifying the state after restore: +- Checkpointing the server process in the middle of data transfer. (Sockets.java) +- Checkpointing the server process after it has bound to a port but is not listening for client connections. (SocketListen.java) +- Checkpointing the server process while it is listening for client connections, and no client has connected yet. (SocketConnect.java) +- Checkpointing the server process when it has multiple clients in multiple states connected to it. (SocketMultiple.java) +- Checkpointing the client process in the middle of data transfer. (SocketsData.java) ### Prerequisites for running the tests: - Maven diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java index b848c9938e..9d61e126f1 100644 --- a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -154,6 +154,7 @@ public void runtest(String testName, String checkpointOpt, String restoreOpt) th */ while (Helper.STATE_INIT == currentState) { currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + Thread.sleep(100); } /* diff --git a/test/javaTests/src/org/criu/java/tests/Helper.java b/test/javaTests/src/org/criu/java/tests/Helper.java index fdf20bb521..9a1b333286 100644 --- a/test/javaTests/src/org/criu/java/tests/Helper.java +++ b/test/javaTests/src/org/criu/java/tests/Helper.java @@ -30,7 +30,7 @@ class Helper { * the pid to the pidFile. * * @param testName Name of the java test - * @param pid Pid of the java test process + * @param pid Pid of the java test process * @param logger * @return 0 or 1 denoting whether the function was successful or not. * @throws IOException diff --git a/test/javaTests/src/org/criu/java/tests/SocketHelper.java b/test/javaTests/src/org/criu/java/tests/SocketHelper.java new file mode 100644 index 0000000000..684125019d --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketHelper.java @@ -0,0 +1,100 @@ +package org.criu.java.tests; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.logging.SimpleFormatter; + +class SocketHelper { + + static char STATE_LISTEN = 'S'; + static char STATE_SUCCESS = 'Z'; + static String IP_ADDRESS = "127.0.0.1"; + + /** + * Creates a new log file, for the logger to log in. + * + * @param testName Name of the server or client program + * @param parentTestName Name of the test + * @param logger + * @throws IOException + */ + static void init(String testName, String parentTestName, Logger logger) throws IOException { + FileHandler handler = new FileHandler(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/" + testName + ".log", false); + handler.setFormatter(new SimpleFormatter()); + handler.setLevel(Level.FINE); + logger.addHandler(handler); + logger.setLevel(Level.FINE); + } + + /** + * Writes pid of the process to be checkpointed in the file + * + * @param parentTestName Name of the test + * @param pid Pid of the process to be checkpointed + * @throws IOException + */ + static void writePid(String parentTestName, String pid) throws IOException { + File pidfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/" + parentTestName + Helper.PID_APPEND); + BufferedWriter pidwriter = new BufferedWriter(new FileWriter(pidfile)); + /* + * Overwriting pid to be checkpointed + */ + pidwriter.write(pid + "\n"); + pidwriter.close(); + } + + /** + * Waits for the MappedByteBuffer to change state from STATE_CHECKPOINT to STATE_RESTORE + * + * @param socketMappedBuffer MappedByteBuffer between the client, server and the controller process. + * @param logger + */ + static void socketWaitForRestore(MappedByteBuffer socketMappedBuffer, Logger logger) { + while (Helper.STATE_CHECKPOINT == socketMappedBuffer.getChar(Helper.MAPPED_INDEX)) { + ; + } + if (Helper.STATE_RESTORE != socketMappedBuffer.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Server socket was not in expected restore state " + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } else { + logger.log(Level.INFO, "Restored!!!"); + } + } + + /** + * Puts the MappedByteBuffer to Helper.STATE_CHECKPOINT and waits for CheckpointRestore.java to change its state to Helper.STATE_RESTORE + * + * @param b MappedByteBuffer between the controller process and CheckpointRestore.java + * @param logger Logger to log the messages + * @param p1 Process object for the client process + * @param p2 Process object for the server process + */ + static void checkpointAndWait(MappedByteBuffer b, Logger logger, Process p1, Process p2) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + char c = b.getChar(Helper.MAPPED_INDEX); + while (Helper.STATE_CHECKPOINT == c) { + c = b.getChar(Helper.MAPPED_INDEX); + } + if (Helper.STATE_TERMINATE == c) { + logger.log(Level.SEVERE, "Error during checkpoint-restore, Test terminated"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + p1.destroy(); + p2.destroy(); + System.exit(1); + } + if (Helper.STATE_RESTORE != c) { + logger.log(Level.SEVERE, "Error: Test state is not the expected Restored state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + p1.destroy(); + p2.destroy(); + System.exit(1); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/Sockets.java b/test/javaTests/src/org/criu/java/tests/Sockets.java new file mode 100644 index 0000000000..94cc217c4a --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/Sockets.java @@ -0,0 +1,141 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class Sockets { + static String TESTNAME = "Sockets"; + + /** + * Runs the client and server process, checkpoints the server process while its in the middle of data transfer + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Creating socketBufferFile and setting the init value of buffer"); + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsServer", TESTNAME, port); + Process serverProcess = builder.start(); + logger.log(Level.INFO, "Server process started"); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "Error took place in the client or server process; check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored"); + } + /* + * Loop while test is running. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_FAIL && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_PASS) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + /* + * Client process puts socketMappedBuffer to Pass state if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsClient.java b/test/javaTests/src/org/criu/java/tests/SocketsClient.java new file mode 100644 index 0000000000..1c8e7b9a18 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsClient.java @@ -0,0 +1,133 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsClient { + static String TESTNAME = "SocketsClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + Logger logger = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + readMssg, msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + String parentTestName, portArg; + int port; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + + /* + * Ensure client does not try to connect to port before server has bound itself. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + /* + * Socket Buffer should be put in SocketHelper.STATE_LISTEN state by server process, just before + * it starts listening for client connections. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Buffer does not contain the expected 'server bound to port and listening' state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + /* + * Ensure server has bound to port + */ + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "InterruptedException occurred!"); + } + + socket = new Socket(SocketHelper.IP_ADDRESS, port); + + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "Error: wrong message received; message expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "Error: wrong message received; message expected " + msg4); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + /* + * Wait for server process to end and then check whether it ended successfully or not + * If it has finished properly the socketMappedBuffer will contain SocketHelper.STATE_SUCCESS + */ + logger.log(Level.INFO, "Waiting for server process to end...."); + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to pass state, else to failed state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + logger.log(Level.INFO, "Test ends"); + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsConnect.java b/test/javaTests/src/org/criu/java/tests/SocketsConnect.java new file mode 100644 index 0000000000..164c210896 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsConnect.java @@ -0,0 +1,157 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsConnect { + static String TESTNAME = "SocketsConnect"; + + /** + * Runs the client and server process, checkpoints the server when its listening for incoming client connection requests on a port but no client has connected yet + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + logger.log(Level.INFO, "Creating socketbufferfile and setting the init value of buffer"); + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsConnectFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsConnectServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsConnectClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Thread was interrupted"); + } + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to 'P'-Pass state if the test passed. + * Send pass message to Checkpoint-restore.java + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java b/test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java new file mode 100644 index 0000000000..ed1c7fab3c --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java @@ -0,0 +1,130 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsConnectClient { + static String TESTNAME = "SocketsConnectClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String parentTestName, portArg; + int port; + Logger logger = null; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + readMssg, msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsConnectFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Waiting for CR"); + /* + * Wait for Checkpoint-Restore to occur + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE) { + logger.log(Level.SEVERE, "Error:Buffer does not contain the expected restored state: " + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + logger.log(Level.INFO, "Restored"); + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + + /* + * Server should has have been listening for client connections when it was checkpointed, and it should continue to listen after restore. + */ + try { + socket = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception occured when connecting to port: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg4); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to pass state, else to failed state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java b/test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java new file mode 100644 index 0000000000..1e4cf3aeb1 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java @@ -0,0 +1,151 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsConnectServer { + static String TESTNAME = "SocketsConnectServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", msg3 = "@Ft@rCPM$$g3", + msg4 = "Aft@rCPM$$g4", readMssg; + Logger logger = null; + String parentTestName, portArg; + int port; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsConnectFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "Server pid: " + pid); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port: " + port); + + /* + * Timeout after 7 sec if client does not connect + */ + try { + ser.setSoTimeout(7 * 1000); + + } catch (SocketException e) { + logger.log(Level.SEVERE, "Cannot set timeout!"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + logger.log(Level.INFO, "Waiting for client to connect"); + logger.log(Level.INFO, "Going to checkpoint"); + + try { + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + ser.close(); + System.exit(1); + } + /* + * Checkpoint when server is listening for connections, and no client has connected to the server. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + socket = ser.accept(); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Timed out while waiting for client to connect\n" + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + if (!ser.isBound()) { + logger.log(Level.SEVERE, "Server is not bound to a port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (ser.getLocalPort() != port) { + logger.log(Level.SEVERE, "Server is not listening on correct port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong,received: " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong, received: " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + outstream.println(msg4); + logger.log(Level.INFO, "Sent message 4 " + msg4); + + socket.close(); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsData.java b/test/javaTests/src/org/criu/java/tests/SocketsData.java new file mode 100644 index 0000000000..67d8cef0e0 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsData.java @@ -0,0 +1,156 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsData { + static String TESTNAME = "SocketsData"; + + /** + * Runs the server and client processes, checkpoints the client process when its in the middle of data transfer + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + Logger logger = null; + String port = "49200"; + try { + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + logger.log(Level.INFO, "Creating socketbufferfile and setting the init value of buffer"); + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsDataFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsDataServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsDataClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint client process"); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Thread was interrupted"); + } + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + serverProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + serverProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to STATE_PASS if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + logger.log(Level.INFO, "Did not receive pass message from the client process"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsDataClient.java b/test/javaTests/src/org/criu/java/tests/SocketsDataClient.java new file mode 100644 index 0000000000..49885a8866 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsDataClient.java @@ -0,0 +1,141 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsDataClient { + static String TESTNAME = "SocketsDataClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String parentTestName, portArg; + int port; + Logger logger = null; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + readMssg, msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsDataFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + + logger.log(Level.INFO, "Client pid: " + pid); + SocketHelper.writePid(parentTestName, pid); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + /* + * Socket Mapped Buffer should be in 'Server listening for connections' state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "socket-buffer not in expected state, current state: " + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Server starts listening on port after putting the Mapped Buffer is in SocketHelper.STATE_LISTEN state + */ + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + + try { + socket = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (IOException e) { + logger.log(Level.SEVERE, "Exception occured when connecting to port: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + /* + * Checkpoints and wait for Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to pass state, else to failed state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsDataServer.java b/test/javaTests/src/org/criu/java/tests/SocketsDataServer.java new file mode 100644 index 0000000000..65fe92a9d9 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsDataServer.java @@ -0,0 +1,124 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsDataServer { + static String TESTNAME = "SocketsDataServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String parentTestName, portArg; + int port; + Socket socket = null; + Logger logger = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4", readMssg; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsDataFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port " + port); + + /* + * Wait for 7 seconds for client to connect, else throw a timeout exception + */ + try { + ser.setSoTimeout(7 * 1000); + + } catch (SocketException e) { + logger.log(Level.SEVERE, "cannot set timeout"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + logger.log(Level.INFO, "Waiting for client to connect"); + /* + * Put Socket Mapped Buffer to SocketHelper.STATE_LISTEN state - server has bound to port and + * begin listening for connections. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + socket = ser.accept(); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong:rec " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong:rec " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + outstream.println(msg4); + logger.log(Level.INFO, "Sent message 4 " + msg4); + + socket.close(); + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsListen.java b/test/javaTests/src/org/criu/java/tests/SocketsListen.java new file mode 100644 index 0000000000..3fad385493 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsListen.java @@ -0,0 +1,153 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsListen { + static String TESTNAME = "SocketsListen"; + + /** + * Runs the client and server process, checkpoints the server process when the server has bound to a port, but has not yet started listening + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Creating socketbufferfile and setting the init value of buffer"); + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsListenFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsListenServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsListenClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to Helper.STATE_PASS-Pass state if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsListenClient.java b/test/javaTests/src/org/criu/java/tests/SocketsListenClient.java new file mode 100644 index 0000000000..efcb3d545a --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsListenClient.java @@ -0,0 +1,136 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsListenClient { + static String TESTNAME = "SocketsListenClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String parentTestName, portArg; + int port; + Logger logger = null; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", readMssg, + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsListenFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Waiting for CR"); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + ; + } + + logger.log(Level.INFO, "Restored"); + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Buffer does not contain the expected 'server bound to port' state" + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + /* + * Make the thread sleep to ensure server is listening on the port for client connections. + */ + logger.log(Level.INFO, "Put thread to sleep"); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Thread was interuptedp"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + try { + socket = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception occured when connecting to port: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg4); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put MappedBuffer to STATE_PASS, else to STATE_FAIL + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsListenServer.java b/test/javaTests/src/org/criu/java/tests/SocketsListenServer.java new file mode 100644 index 0000000000..46fef40ecb --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsListenServer.java @@ -0,0 +1,160 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsListenServer { + static String TESTNAME = "SocketsListenServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String parentTestName, portArg; + int port; + Logger logger = null; + Socket socket = null; + String readMssg, msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsListenFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "Server pid: " + pid); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + logger.log(Level.INFO, "Server will be listening on Port " + port); + ServerSocket ser = new ServerSocket(port); + /* + * Server has bound to a port but is not listening yet! + */ + logger.log(Level.INFO, "Going to checkpoint"); + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + ser.close(); + System.exit(1); + } + /* + * Checkpoint and wait for Restore. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + if (!ser.isBound()) { + logger.log(Level.SEVERE, "Server is not bound to a port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (ser.getLocalPort() != port) { + logger.log(Level.SEVERE, "SServer is not listening on correct port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + /* + * Timeout after 5 sec if client does not connect + */ + try { + ser.setSoTimeout(5 * 1000); + + } catch (SocketException e) { + logger.log(Level.SEVERE, "cannot set timeout"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + try { + logger.log(Level.INFO, "Waiting for client to connect"); + /* + * Put Socket Mapped Buffer to SocketHelper.STATE_LISTEN state - server has bound to port and + * will begin listening for connections. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + socket = ser.accept(); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Timed out while waiting for client to connect\n" + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong:rec " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong:rec " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + outstream.println(msg4); + logger.log(Level.INFO, "Sending message: " + msg4); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + socket.close(); + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsMultiple.java b/test/javaTests/src/org/criu/java/tests/SocketsMultiple.java new file mode 100644 index 0000000000..5e55c42741 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsMultiple.java @@ -0,0 +1,152 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsMultiple { + static String TESTNAME = "SocketsMultiple"; + + /** + * Runs the Client and Server Processes, Multiple clients connect to server Process, checkpoints the server process + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + logger.log(Level.INFO, "Creating socketBufferFile and setting the init value of buffer"); + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsMultipleFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsMultipleServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsMultipleClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to STATE_PASS state if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java b/test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java new file mode 100644 index 0000000000..d97a946fd2 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java @@ -0,0 +1,174 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsMultipleClient { + static String TESTNAME = "SocketsMultipleClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String msg1 = "Message1", msg2 = "Message2", readMssg; + Socket socket1 = null, socket2 = null, socket3 = null, socket4 = null; + String parentTestName, portArg; + int port; + Logger logger = null; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsMultipleFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected state"); + + } + try { + logger.log(Level.INFO, "client 1 connecting..."); + socket1 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client 1 connected to server successfully"); + PrintStream out1 = new PrintStream(socket1.getOutputStream()); + BufferedReader br1 = new BufferedReader(new InputStreamReader(socket1.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket1"); + try { + logger.log(Level.INFO, "client 2 connecting..."); + socket2 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client 2 connected to server successfully"); + PrintStream out2 = new PrintStream(socket2.getOutputStream()); + BufferedReader br2 = new BufferedReader(new InputStreamReader(socket2.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket2"); + + try { + logger.log(Level.INFO, "client 3 connecting..."); + socket3 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client 3 connected to server successfully"); + PrintStream out3 = new PrintStream(socket3.getOutputStream()); + BufferedReader br3 = new BufferedReader(new InputStreamReader(socket3.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket3"); + + out1.println(msg1); + + readMssg = br1.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + socket1.close(); + + out2.println(msg1); + + /* + * Wait for Checkpoint-Restore + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE) { + logger.log(Level.SEVERE, "Socket-mapped-buffer is not in restored state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Server is Restored!!"); + + out3.println(msg1); + readMssg = br2.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received by client 2; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + readMssg = br3.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received by client 3; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + socket2.close(); + socket3.close(); + + try { + logger.log(Level.INFO, "client 4 connecting..."); + socket4 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + } + logger.log(Level.INFO, "Client 4 connected to server successfully"); + PrintStream out4 = new PrintStream(socket4.getOutputStream()); + BufferedReader br4 = new BufferedReader(new InputStreamReader(socket4.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket4"); + + out4.println(msg1); + readMssg = br4.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received by client 4; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + socket4.close(); + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to STATE_PASS, else to STATE_FAIL + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java b/test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java new file mode 100644 index 0000000000..a7e4d3b9ef --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java @@ -0,0 +1,215 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsMultipleServer { + static String TESTNAME = "SocketsMultipleServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String parentTestName, portArg; + int port; + Logger logger = null; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsMultipleFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "Server pid: " + pid); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * The array indexes 3, 5, 7 and 9 will map the state of client 1, 2, 3 and 4. + * Set these array indexes to init state. + */ + + socketMappedBuffer.putChar(3, Helper.STATE_INIT); + socketMappedBuffer.putChar(5, Helper.STATE_INIT); + socketMappedBuffer.putChar(7, Helper.STATE_INIT); + socketMappedBuffer.putChar(9, Helper.STATE_INIT); + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port " + port); + + Socket[] sockets = new Socket[4]; + + /* + * Set the SocketMappedBuffer to S state-server will be listening for connections + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + + for (int i = 1; i <= 4; i++) { + sockets[i - 1] = ser.accept(); + ServerThread serverThread = new ServerThread(sockets[i - 1], "ser-socket " + i, 2 * i + 1, logger, socketMappedBuffer); + serverThread.start(); + if (i == 3) { + logger.log(Level.INFO, "Connected to client: 3"); + /* + * Client 3 has connected, wait for thread 1 to finish and then checkpoint. + */ + while (socketMappedBuffer.getChar(3) != Helper.STATE_FAIL && socketMappedBuffer.getChar(3) != Helper.STATE_PASS) { + ; + } + logger.log(Level.INFO, "Going to checkpoint"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + } + } + + /* + * Loop while any of the 4 thread is running + */ + while (socketMappedBuffer.getChar(3) == Helper.STATE_INIT || socketMappedBuffer.getChar(5) == Helper.STATE_INIT + || socketMappedBuffer.getChar(7) == Helper.STATE_INIT || socketMappedBuffer.getChar(9) == Helper.STATE_INIT) { + ; + } + + /* + * Check Socket Mapped Buffer for a thread that failed + */ + for (int i = 1; i <= 4; i++) { + if (socketMappedBuffer.getChar(i * 2 + 1) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Error in thread connected to client " + i); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + + /* + * Check the 1st Socket is closed + */ + if (!sockets[0].isClosed()) { + logger.log(Level.SEVERE, "socket 1 is not closed"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Socket 1 is in expected closed state: " + sockets[0].isClosed()); + + /* + * Check all threads are in expected pass state + */ + for (int i = 1; i <= 4; i++) { + if (socketMappedBuffer.getChar(i * 2 + 1) != Helper.STATE_PASS) { + logger.log(Level.SEVERE, "Unexpected State of buffer: " + socketMappedBuffer.getChar(i * 2 + 1) + ", client: " + i); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + logger.log(Level.INFO, "Done"); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} + +class ServerThread extends Thread { + Socket socket = null; + String name; + int num; + MappedByteBuffer socketMappedBuffer; + Logger logger; + + ServerThread(Socket socket, String name, int num, Logger logger, MappedByteBuffer socketMappedBuffer) { + this.socket = socket; + this.name = name; + this.logger = logger; + this.num = num; + this.socketMappedBuffer = socketMappedBuffer; + } + + public void run() { + try { + String readMssg, msg1 = "Message1", msg2 = "Message2"; + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream out = new PrintStream(socket.getOutputStream()); + readMssg = br.readLine(); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message read by thread " + name + " was not 'Message1', received Message: " + readMssg); + socket.close(); + socketMappedBuffer.putChar(num, Helper.STATE_FAIL); + } else { + logger.log(Level.INFO, name + " received correct message"); + out.println(msg2); + logger.log(Level.INFO, name + " has sent message"); + socket.close(); + socketMappedBuffer.putChar(num, Helper.STATE_PASS); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred in thread :" + name + " " + exception); + logger.log(Level.FINE, writer.toString()); + } + + try { + if (socket != null) { + socket.close(); + } + } catch (IOException e) { + ; + } + + /* + * If exception occurs fail the thread + */ + socketMappedBuffer.putChar(num, Helper.STATE_FAIL); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsServer.java b/test/javaTests/src/org/criu/java/tests/SocketsServer.java new file mode 100644 index 0000000000..051233443d --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsServer.java @@ -0,0 +1,142 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsServer { + static String TESTNAME = "SocketsServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4", readMssg; + FileChannel channel; + String parentTestName, portArg; + int port; + Logger logger = null; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + SocketHelper.init(TESTNAME, parentTestName, logger); + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Socket buffer mapped"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port " + port); + + /* + * Timeout after 5 second if client does not connect + */ + ser.setSoTimeout(5 * 1000); + logger.log(Level.INFO, "Waiting for client to connect"); + Socket socket = null; + try { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + socket = ser.accept(); + } catch (Exception e) { + logger.log(Level.SEVERE, "Timed out while waiting for client to connect"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong:rec " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + logger.log(Level.INFO, "Going to checkpoint"); + /* + * Put socket Mapped Buffer to 'to be checkpointed' state and wait for restore + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + if (!ser.isBound()) { + logger.log(Level.SEVERE, "Server is not bound to a port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (ser.getLocalPort() != port) { + logger.log(Level.SEVERE, "Server is not listening on correct port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong:rec " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + socket.close(); + System.exit(1); + } + + outstream.println(msg4); + logger.log(Level.INFO, "Sent message 4 " + msg4); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + socket.close(); + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/test.xml b/test/javaTests/test.xml index b73a31db29..4768bf1935 100644 --- a/test/javaTests/test.xml +++ b/test/javaTests/test.xml @@ -40,4 +40,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 32f4f8bd0b12e832e315f49e654df93d7121f532 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Thu, 9 Jan 2020 22:43:25 +0530 Subject: [PATCH 185/277] Switch open-j9 alpine tests to python3 Signed-off-by: Nidhi Gupta --- scripts/build/Dockerfile.openj9-alpine | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine index 43a9934446..39ea4d08e6 100644 --- a/scripts/build/Dockerfile.openj9-alpine +++ b/scripts/build/Dockerfile.openj9-alpine @@ -14,7 +14,7 @@ RUN apk update && apk add \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ - python \ + python3 \ sudo \ maven \ ip6tables \ From 72925ccf807cb1bf02fedbdfc046036134b62336 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 Jan 2020 14:47:18 +0100 Subject: [PATCH 186/277] ppc64le: remove register '1' from clobber list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling 'criu-dev' on Fedora 31 gives two errors about wrong clobber lists: compel/include/uapi/compel/asm/sigframe.h:47:9: error: listing the stack pointer register ‘1’ in a clobber list is deprecated [-Werror=deprecated] criu/arch/ppc64/include/asm/restore.h:14:2: error: listing the stack pointer register ‘1’ in a clobber list is deprecated [-Werror=deprecated] There was also a bug report from Debian that CRIU does not build because of this. Each of these errors comes with the following note: note: the value of the stack pointer after an ‘asm’ statement must be the same as it was before the statement As far as I understand it this should not be a problem in this cases as the code never returns anyway. Running zdtm very seldom fails during 'zdtm/static/cgroup_ifpriomap' with a double free or corruption. This happens not very often and I cannot verify if it happens without this patch. As CRIU does not build without the patch. Signed-off-by: Adrian Reber --- compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h | 2 +- criu/arch/ppc64/include/asm/restore.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h index 9467a1b990..5c98b199de 100644 --- a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h @@ -50,7 +50,7 @@ struct rt_sigframe { "sc \n" \ : \ : "r"(new_sp) \ - : "1", "memory") + : "memory") #if _CALL_ELF != 2 # error Only supporting ABIv2. diff --git a/criu/arch/ppc64/include/asm/restore.h b/criu/arch/ppc64/include/asm/restore.h index 8d4516090c..f065ec3a0c 100644 --- a/criu/arch/ppc64/include/asm/restore.h +++ b/criu/arch/ppc64/include/asm/restore.h @@ -21,7 +21,7 @@ : "r"(new_sp), \ "r"((unsigned long)restore_task_exec_start), \ "r"(task_args) \ - : "1", "3", "12") + : "3", "12") /* There is nothing to do since TLS is accessed through r13 */ #define core_get_tls(pcore, ptls) From 2121d2120e367b38fce06bff40fc83a986ac66d3 Mon Sep 17 00:00:00 2001 From: Valeriy Vdovin Date: Fri, 10 Jan 2020 15:57:50 +0300 Subject: [PATCH 187/277] image: core -- Reserve start_time field To ensure consistency of runtime environment processes within a container need to see same start time values over suspend/resume cycles. We introduce new field to the core image structure to store start time of a dumped process. Later same value would be restored to a newly created task. In future the feature is likely to be pulled here, so we reserve field id in protobuf descriptor. Signed-off-by: Valeriy Vdovin --- images/core.proto | 2 ++ 1 file changed, 2 insertions(+) diff --git a/images/core.proto b/images/core.proto index c3dba6f6d7..e90522914e 100644 --- a/images/core.proto +++ b/images/core.proto @@ -53,6 +53,8 @@ message task_core_entry { //optional int32 tty_pgrp = 17; optional bool child_subreaper = 18; + // Reserved for container relative start time + //optional uint64 start_time = 19; } message task_kobj_ids_entry { From 7e7b6475a3159d58ab7d5860d21ef680f4385e9b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 9 Jan 2020 18:54:50 +0000 Subject: [PATCH 188/277] travis: reduce the number of podman tests We are running each podman test loop 50 times. This takes more than 20 minutes in Travis. Reduce both test loops to only run 20 times. Signed-off-by: Adrian Reber --- scripts/travis/podman-test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index 5189477cd6..825bca746e 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -35,7 +35,7 @@ criu --version podman run --name cr -d docker.io/library/alpine /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done' sleep 1 -for i in `seq 50`; do +for i in `seq 20`; do echo "Test $i for podman container checkpoint" podman exec cr ps axf podman logs cr @@ -48,7 +48,7 @@ for i in `seq 50`; do podman logs cr done -for i in `seq 50`; do +for i in `seq 20`; do echo "Test $i for podman container checkpoint --export" podman ps -a podman exec cr ps axf From bde7beaf83b12e8c4e6b9323eb9c5bd592570a6c Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 15 Dec 2019 20:38:46 +0000 Subject: [PATCH 189/277] kerndat: detect if system support clone3() with set_tid Linux kernel 5.4 extends clone3() with set_tid to allow processes to specify the PID of a newly created process. This introduces detection of the clone3() syscall and if set_tid is supported. This first implementation is X86_64 only. Signed-off-by: Adrian Reber --- .../arch/arm/plugins/std/syscalls/syscall.def | 1 + .../plugins/std/syscalls/syscall-ppc64.tbl | 1 + .../plugins/std/syscalls/syscall-s390.tbl | 1 + .../x86/plugins/std/syscalls/syscall_32.tbl | 1 + .../x86/plugins/std/syscalls/syscall_64.tbl | 1 + .../plugins/include/uapi/std/syscall-types.h | 1 + criu/cr-check.c | 12 ++++++ criu/include/kerndat.h | 1 + criu/include/sched.h | 33 +++++++++++++++ criu/kerndat.c | 41 +++++++++++++++++++ 10 files changed, 93 insertions(+) create mode 100644 criu/include/sched.h diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index d5bdc677e2..f7ebc85278 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -115,3 +115,4 @@ ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *t fsopen 430 430 (char *fsname, unsigned int flags) fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux) fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags) +clone3 435 435 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 4e283d5e93..1afaf1e704 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -111,3 +111,4 @@ __NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index fd48e39507..ae6fdb5f8d 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -111,3 +111,4 @@ __NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl index 038aeb4f75..7a487110d9 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -99,3 +99,4 @@ __NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 215f320267..6667c07db7 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -110,3 +110,4 @@ __NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struc __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/plugins/include/uapi/std/syscall-types.h b/compel/plugins/include/uapi/std/syscall-types.h index 57865e7413..031e773bb6 100644 --- a/compel/plugins/include/uapi/std/syscall-types.h +++ b/compel/plugins/include/uapi/std/syscall-types.h @@ -39,6 +39,7 @@ struct msghdr; struct rusage; struct iocb; struct pollfd; +struct clone_args; typedef unsigned long aio_context_t; diff --git a/criu/cr-check.c b/criu/cr-check.c index 17dd29b42e..80df3f7cdc 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1224,6 +1224,16 @@ static int check_uffd_noncoop(void) return 0; } +static int check_clone3_set_tid(void) +{ + if (!kdat.has_clone3_set_tid) { + pr_warn("clone3() with set_tid not supported\n"); + return -1; + } + + return 0; +} + static int check_can_map_vdso(void) { if (kdat_can_map_vdso() == 1) @@ -1373,6 +1383,7 @@ int cr_check(void) ret |= check_sk_netns(); ret |= check_kcmp_epoll(); ret |= check_net_diag_raw(); + ret |= check_clone3_set_tid(); } /* @@ -1476,6 +1487,7 @@ static struct feature_list feature_list[] = { { "link_nsid", check_link_nsid}, { "kcmp_epoll", check_kcmp_epoll}, { "external_net_ns", check_external_net_ns}, + { "clone3_set_tid", check_clone3_set_tid}, { NULL, NULL }, }; diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 771195860c..27c870bb86 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -66,6 +66,7 @@ struct kerndat_s { bool has_inotify_setnextwd; bool has_kcmp_epoll_tfd; bool has_fsopen; + bool has_clone3_set_tid; }; extern struct kerndat_s kdat; diff --git a/criu/include/sched.h b/criu/include/sched.h new file mode 100644 index 0000000000..78f65e3b7e --- /dev/null +++ b/criu/include/sched.h @@ -0,0 +1,33 @@ +#ifndef __CR_SCHED_H__ +#define __CR_SCHED_H__ + +#include + +#ifndef ptr_to_u64 +#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) +#endif +#ifndef u64_to_ptr +#define u64_to_ptr(x) ((void *)(uintptr_t)x) +#endif + +/* + * This structure is needed by clone3(). The kernel + * calls it 'struct clone_args'. As CRIU will always + * need at least this part of the structure (VER1) + * to be able to test if clone3() with set_tid works, + * the structure is defined here as 'struct _clone_args'. + */ + +struct _clone_args { + __aligned_u64 flags; + __aligned_u64 pidfd; + __aligned_u64 child_tid; + __aligned_u64 parent_tid; + __aligned_u64 exit_signal; + __aligned_u64 stack; + __aligned_u64 stack_size; + __aligned_u64 tls; + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; +}; +#endif /* __CR_SCHED_H__ */ diff --git a/criu/kerndat.c b/criu/kerndat.c index d1afde71d3..0772828bc1 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -41,6 +41,7 @@ #include "uffd.h" #include "vdso.h" #include "kcmp.h" +#include "sched.h" struct kerndat_s kdat = { }; @@ -986,6 +987,44 @@ static int kerndat_tun_netns(void) return check_tun_netns_cr(&kdat.tun_ns); } +static bool kerndat_has_clone3_set_tid(void) +{ + pid_t pid; + struct _clone_args args = {}; + +#ifndef CONFIG_X86_64 + /* + * Currently the CRIU PIE assembler clone3() wrapper is + * only implemented for X86_64. + */ + kdat.has_clone3_set_tid = false; + return 0; +#endif + + args.set_tid = -1; + /* + * On a system without clone3() this will return ENOSYS. + * On a system with clone3() but without set_tid this + * will return E2BIG. + * On a system with clone3() and set_tid it will return + * EINVAL. + */ + pid = syscall(__NR_clone3, &args, sizeof(args)); + + if (pid == -1 && (errno == ENOSYS || errno == E2BIG)) { + kdat.has_clone3_set_tid = false; + return 0; + } + if (pid == -1 && errno == EINVAL) { + kdat.has_clone3_set_tid = true; + } else { + pr_perror("Unexpected error from clone3\n"); + return -1; + } + + return 0; +} + int kerndat_init(void) { int ret; @@ -1059,6 +1098,8 @@ int kerndat_init(void) ret = has_kcmp_epoll_tfd(); if (!ret) ret = kerndat_has_fsopen(); + if (!ret) + ret = kerndat_has_clone3_set_tid(); kerndat_lsm(); kerndat_mmap_min_addr(); From e97bc57d65944c0f79582caa2520b8bf9f22ef73 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 16 Dec 2019 07:57:03 +0000 Subject: [PATCH 190/277] Add assembler wrapper for clone3() To create a new process/thread with a certain PID based on clone3() a new assembler wrapper is necessary as there is not glibc wrapper (yet). Signed-off-by: Adrian Reber --- criu/arch/aarch64/include/asm/restorer.h | 7 ++ criu/arch/arm/include/asm/restorer.h | 7 ++ criu/arch/ppc64/include/asm/restorer.h | 7 ++ criu/arch/s390/include/asm/restorer.h | 7 ++ criu/arch/x86/include/asm/restorer.h | 92 ++++++++++++++++++++++++ 5 files changed, 120 insertions(+) diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h index f502cdcaf6..2fe58915be 100644 --- a/criu/arch/aarch64/include/asm/restorer.h +++ b/criu/arch/aarch64/include/asm/restorer.h @@ -42,6 +42,13 @@ "r"(&thread_args[i]) \ : "x0", "x1", "x2", "x3", "x8", "memory") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ "mov sp, %0 \n" \ diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h index 217d920e84..ad4b58f93d 100644 --- a/criu/arch/arm/include/asm/restorer.h +++ b/criu/arch/arm/include/asm/restorer.h @@ -43,6 +43,13 @@ "r"(&thread_args[i]) \ : "r0", "r1", "r2", "r3", "r7", "memory") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ "mov sp, %0 \n" \ diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h index d48d833d6b..19bc3ea36e 100644 --- a/criu/arch/ppc64/include/asm/restorer.h +++ b/criu/arch/ppc64/include/asm/restorer.h @@ -48,6 +48,13 @@ "r"(&thread_args[i]) /* %6 */ \ : "memory","0","3","4","5","6","7","14","15") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define arch_map_vdso(map, compat) -1 int restore_gpregs(struct rt_sigframe *f, UserPpc64RegsEntry *r); diff --git a/criu/arch/s390/include/asm/restorer.h b/criu/arch/s390/include/asm/restorer.h index cfdefcab9b..733f2de339 100644 --- a/criu/arch/s390/include/asm/restorer.h +++ b/criu/arch/s390/include/asm/restorer.h @@ -39,6 +39,13 @@ "d"(&thread_args[i]) \ : "0", "1", "2", "3", "4", "5", "6", "cc", "memory") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define arch_map_vdso(map, compat) -1 int restore_gpregs(struct rt_sigframe *f, UserS390RegsEntry *r); diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h index 25559b57c0..731477ec99 100644 --- a/criu/arch/x86/include/asm/restorer.h +++ b/criu/arch/x86/include/asm/restorer.h @@ -25,6 +25,21 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) } #endif /* !CONFIG_COMPAT */ +/* + * Documentation copied from glibc sysdeps/unix/sysv/linux/x86_64/clone.S + * The kernel expects: + * rax: system call number + * rdi: flags + * rsi: child_stack + * rdx: TID field in parent + * r10: TID field in child + * r8: thread pointer + * + * int clone(unsigned long clone_flags, unsigned long newsp, + * int *parent_tidptr, int *child_tidptr, + * unsigned long tls); + */ + #define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ thread_args, clone_restore_fn) \ asm volatile( \ @@ -63,6 +78,83 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) "g"(&thread_args[i]) \ : "rax", "rcx", "rdi", "rsi", "rdx", "r10", "r11", "memory") +/* int clone3(struct clone_args *args, size_t size) */ +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ + asm volatile( \ + "clone3_emul: \n" \ + /* + * Prepare stack pointer for child process. The kernel does + * stack + stack_size before passing the stack pointer to the + * child process. As we have to put the function and the + * arguments for the new process on that stack we have handle + * the kernel's implicit stack + stack_size. + */ \ + "movq (%3), %%rsi /* new stack pointer */ \n" \ + /* Move the stack_size to %rax to use later as the offset */ \ + "movq %4, %%rax \n" \ + /* 16 bytes are needed on the stack for function and args */ \ + "subq $16, (%%rsi, %%rax) \n" \ + "movq %6, %%rdi /* thread args */ \n" \ + "movq %%rdi, 8(%%rsi, %%rax) \n" \ + "movq %5, %%rdi /* thread function */ \n" \ + "movq %%rdi, 0(%%rsi, %%rax) \n" \ + /* + * The stack address has been modified for the two + * elements above (child function, child arguments). + * This modified stack needs to be stored back into the + * clone_args structure. + */ \ + "movq (%%rsi), %3 \n" \ + /* + * Do the actual clone3() syscall. First argument (%rdi) is + * the clone_args structure, second argument is the size + * of clone_args. + */ \ + "movq %1, %%rdi /* clone_args */ \n" \ + "movq %2, %%rsi /* size */ \n" \ + "movl $"__stringify(__NR_clone3)", %%eax \n" \ + "syscall \n" \ + /* + * If clone3() was successful and if we are in the child + * '0' is returned. Jump to the child function handler. + */ \ + "testq %%rax,%%rax \n" \ + "jz thread3_run \n" \ + /* Return the PID to the parent process. */ \ + "movq %%rax, %0 \n" \ + "jmp clone3_end \n" \ + \ + "thread3_run: /* Child process */ \n" \ + /* Clear the frame pointer */ \ + "xorq %%rbp, %%rbp \n" \ + /* Pop the child function from the stack */ \ + "popq %%rax \n" \ + /* Pop the child function arguments from the stack */ \ + "popq %%rdi \n" \ + /* Run the child function */ \ + "callq *%%rax \n" \ + /* + * If the child function is expected to return, this + * would be the place to handle the return code. In CRIU's + * case the child function is expected to not return + * and do exit() itself. + */ \ + \ + "clone3_end: \n" \ + : "=r"(ret) \ + /* + * This uses the "r" modifier for all parameters + * as clang complained if using "g". + */ \ + : "r"(&clone_args), \ + "r"(size), \ + "r"(&clone_args.stack), \ + "r"(clone_args.stack_size), \ + "r"(clone_restore_fn), \ + "r"(args) \ + : "rax", "rcx", "rdi", "rsi", "rdx", "r10", "r11", "memory") + #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ "movq %0, %%rsp \n" \ From 2082c03066a8eb2de1e2fc1cf828af1baa24e59e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 16 Dec 2019 10:42:13 +0000 Subject: [PATCH 191/277] Use clone3() with set_tid to create processes With the in Linux Kernel 5.4 introduced clone3() with set_tid it is no longer necessary to write to to /proc/../ns_last_pid to influence the next PID number. clone3() can directly select a PID for the newly created process/thread. After checking for the availability of clone3() with set_tid and adding the assembler wrapper for clone3() in previous patches, this extends criu/pie/restorer.c and criu/clone-noasan.c to use the newly added assembler clone3() wrapper to create processes with a certain PID. This is a RFC and WIP, but I wanted to share it and run it through CI for feedback. As the CI will probably not use a 5.4 based kernel it should just keep on working as before. Signed-off-by: Adrian Reber --- criu/clone-noasan.c | 32 +++++++++++++++++++ criu/cr-restore.c | 64 +++++++++++++++++++++++-------------- criu/include/clone-noasan.h | 2 ++ criu/include/restorer.h | 1 + criu/include/rst_info.h | 1 + criu/pie/restorer.c | 64 ++++++++++++++++++++++++------------- 6 files changed, 117 insertions(+), 47 deletions(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index 5f1858d4d0..bcbc3e4bdd 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -1,4 +1,10 @@ +#include #include +#include + +#include + +#include "sched.h" #include "common/compiler.h" #include "log.h" #include "common/bug.h" @@ -31,6 +37,7 @@ int clone_noasan(int (*fn)(void *), int flags, void *arg) { void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16); + BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK)); /* * Reserve some bytes for clone() internal needs @@ -38,3 +45,28 @@ int clone_noasan(int (*fn)(void *), int flags, void *arg) */ return clone(fn, stack_ptr, flags, arg); } + +int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, + int exit_signal, pid_t pid) +{ + struct _clone_args c_args = {}; + + BUG_ON(flags & CLONE_VM); + + /* + * Make sure no child signals are requested. clone3() uses + * exit_signal for that. + */ + BUG_ON(flags & 0xff); + + pr_debug("Creating process using clone3()\n"); + + c_args.exit_signal = exit_signal; + c_args.flags = flags; + c_args.set_tid = ptr_to_u64(&pid); + c_args.set_tid_size = 1; + pid = syscall(__NR_clone3, &c_args, sizeof(c_args)); + if (pid == 0) + exit(fn(arg)); + return pid; +} diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 516477dcdd..9cdb9b3ed2 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1375,40 +1375,55 @@ static inline int fork_with_pid(struct pstree_item *item) if (!(ca.clone_flags & CLONE_NEWPID)) { char buf[32]; int len; - int fd; + int fd = -1; - fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); - if (fd < 0) - goto err; + if (!kdat.has_clone3_set_tid) { + fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); + if (fd < 0) + goto err; + } lock_last_pid(); - len = snprintf(buf, sizeof(buf), "%d", pid - 1); - if (write(fd, buf, len) != len) { - pr_perror("%d: Write %s to %s", pid, buf, LAST_PID_PATH); + if (!kdat.has_clone3_set_tid) { + len = snprintf(buf, sizeof(buf), "%d", pid - 1); + if (write(fd, buf, len) != len) { + pr_perror("%d: Write %s to %s", pid, buf, + LAST_PID_PATH); + close(fd); + goto err_unlock; + } close(fd); - goto err_unlock; } - close(fd); } else { BUG_ON(pid != INIT_PID); } - /* - * Some kernel modules, such as network packet generator - * run kernel thread upon net-namespace creattion taking - * the @pid we've been requeting via LAST_PID_PATH interface - * so that we can't restore a take with pid needed. - * - * Here is an idea -- unhare net namespace in callee instead. - */ - /* - * The cgroup namespace is also unshared explicitly in the - * move_in_cgroup(), so drop this flag here as well. - */ - close_pid_proc(); - ret = clone_noasan(restore_task_with_children, - (ca.clone_flags & ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, &ca); + if (kdat.has_clone3_set_tid) { + ret = clone3_with_pid_noasan(restore_task_with_children, + &ca, (ca.clone_flags & + ~(CLONE_NEWNET | CLONE_NEWCGROUP)), + SIGCHLD, pid); + } else { + /* + * Some kernel modules, such as network packet generator + * run kernel thread upon net-namespace creation taking + * the @pid we've been requesting via LAST_PID_PATH interface + * so that we can't restore a take with pid needed. + * + * Here is an idea -- unshare net namespace in callee instead. + */ + /* + * The cgroup namespace is also unshared explicitly in the + * move_in_cgroup(), so drop this flag here as well. + */ + close_pid_proc(); + ret = clone_noasan(restore_task_with_children, + (ca.clone_flags & + ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, + &ca); + } + if (ret < 0) { pr_perror("Can't fork for %d", pid); goto err_unlock; @@ -3594,6 +3609,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns task_args->vdso_maps_rt = vdso_maps_rt; task_args->vdso_rt_size = vdso_rt_size; task_args->can_map_vdso = kdat.can_map_vdso; + task_args->has_clone3_set_tid = kdat.has_clone3_set_tid; new_sp = restorer_stack(task_args->t->mz); diff --git a/criu/include/clone-noasan.h b/criu/include/clone-noasan.h index 8ef75fa736..0cfdaa1d9a 100644 --- a/criu/include/clone-noasan.h +++ b/criu/include/clone-noasan.h @@ -2,5 +2,7 @@ #define __CR_CLONE_NOASAN_H__ int clone_noasan(int (*fn)(void *), int flags, void *arg); +int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, + int exit_signal, pid_t pid); #endif /* __CR_CLONE_NOASAN_H__ */ diff --git a/criu/include/restorer.h b/criu/include/restorer.h index b93807f5fb..dfb4e6b712 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -221,6 +221,7 @@ struct task_restore_args { #endif int lsm_type; int child_subreaper; + bool has_clone3_set_tid; } __aligned(64); /* diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h index 07c634f4ad..3283849e44 100644 --- a/criu/include/rst_info.h +++ b/criu/include/rst_info.h @@ -4,6 +4,7 @@ #include "common/lock.h" #include "common/list.h" #include "vma.h" +#include "kerndat.h" struct task_entries { int nr_threads, nr_tasks, nr_helpers; diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 888eb8e650..7012b88a15 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -35,6 +35,7 @@ #include "sk-inet.h" #include "vma.h" #include "uffd.h" +#include "sched.h" #include "common/lock.h" #include "common/page.h" @@ -1771,16 +1772,19 @@ long __export_restore_task(struct task_restore_args *args) long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_FS; long last_pid_len; + pid_t thread_pid; long parent_tid; int i, fd = -1; - /* One level pid ns hierarhy */ - fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0); - if (fd < 0) { - pr_err("can't open last pid fd %d\n", fd); - goto core_restore_end; - } + if (!args->has_clone3_set_tid) { + /* One level pid ns hierarhy */ + fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0); + if (fd < 0) { + pr_err("can't open last pid fd %d\n", fd); + goto core_restore_end; + } + } mutex_lock(&task_entries_local->last_pid_mutex); for (i = 0; i < args->nr_threads; i++) { @@ -1791,24 +1795,38 @@ long __export_restore_task(struct task_restore_args *args) continue; new_sp = restorer_stack(thread_args[i].mz); - last_pid_len = std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); - sys_lseek(fd, 0, SEEK_SET); - ret = sys_write(fd, s, last_pid_len); - if (ret < 0) { - pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); - sys_close(fd); - mutex_unlock(&task_entries_local->last_pid_mutex); - goto core_restore_end; - } - - /* - * To achieve functionality like libc's clone() - * we need a pure assembly here, because clone()'ed - * thread will run with own stack and we must not - * have any additional instructions... oh, dear... - */ + if (args->has_clone3_set_tid) { + struct _clone_args c_args = {}; + thread_pid = thread_args[i].pid; + c_args.set_tid = ptr_to_u64(&thread_pid); + c_args.flags = clone_flags; + c_args.set_tid_size = 1; + /* The kernel does stack + stack_size. */ + c_args.stack = new_sp - RESTORE_STACK_SIZE; + c_args.stack_size = RESTORE_STACK_SIZE; + c_args.child_tid = ptr_to_u64(&thread_args[i].pid); + c_args.parent_tid = ptr_to_u64(&parent_tid); + pr_debug("Using clone3 to restore the process\n"); + RUN_CLONE3_RESTORE_FN(ret, c_args, sizeof(c_args), &thread_args[i], args->clone_restore_fn); + } else { + last_pid_len = std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); + sys_lseek(fd, 0, SEEK_SET); + ret = sys_write(fd, s, last_pid_len); + if (ret < 0) { + pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); + sys_close(fd); + mutex_unlock(&task_entries_local->last_pid_mutex); + goto core_restore_end; + } - RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); + /* + * To achieve functionality like libc's clone() + * we need a pure assembly here, because clone()'ed + * thread will run with own stack and we must not + * have any additional instructions... oh, dear... + */ + RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); + } if (ret != thread_args[i].pid) { pr_err("Unable to create a thread: %ld\n", ret); mutex_unlock(&task_entries_local->last_pid_mutex); From c53ed926bc0cfde766aa6724c073df174151d9a6 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 Jan 2020 14:27:09 +0100 Subject: [PATCH 192/277] s390x: remove stack pointer from clobber list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like on all other supported architectures gcc complains about the stack pointer register being part of the clobber list: error: listing the stack pointer register ‘15’ in a clobber list is deprecated [-Werror=deprecated] This removes the stack pointer from the clobber list. 'zdtm.py run -a' still runs without any errors after this change. Signed-off-by: Adrian Reber --- compel/arch/s390/src/lib/include/uapi/asm/sigframe.h | 2 +- criu/arch/s390/include/asm/restore.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h b/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h index b6b8944733..c599ef3ab2 100644 --- a/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h @@ -66,7 +66,7 @@ struct rt_sigframe { "svc 0\n" \ : \ : "d" (new_sp) \ - : "15", "memory") + : "memory") #define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->uc) #define RT_SIGFRAME_REGIP(rt_sigframe) (rt_sigframe)->uc.uc_mcontext.regs.psw.addr diff --git a/criu/arch/s390/include/asm/restore.h b/criu/arch/s390/include/asm/restore.h index 6463d8e628..b77e36c771 100644 --- a/criu/arch/s390/include/asm/restore.h +++ b/criu/arch/s390/include/asm/restore.h @@ -18,7 +18,7 @@ : "d" (new_sp), \ "d"((unsigned long)restore_task_exec_start), \ "d" (task_args) \ - : "2", "14", "15", "memory") + : "2", "14", "memory") /* There is nothing to do since TLS is accessed through %a01 */ #define core_get_tls(pcore, ptls) From 0f0564a13a0f5b02809c9d5c782d3c2b5c1eef7c Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 16 Jan 2020 16:41:40 +0100 Subject: [PATCH 193/277] s390x: use clone3() if possible This adds the parasite clone3() with set_tid wrapper for s390x. In contrast to the x86_64 implementation the thread start address and arguments are not put on the thread stack but passed via r4 and r5. As those registers are caller-saved they still contain the correct value (thread start address and arguments) after returning from the syscall. Tested on 5.5.0-rc6. Signed-off-by: Adrian Reber --- criu/arch/s390/include/asm/restorer.h | 40 +++++++++++++++++++++++---- criu/kerndat.c | 4 +-- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/criu/arch/s390/include/asm/restorer.h b/criu/arch/s390/include/asm/restorer.h index 733f2de339..2fc2665354 100644 --- a/criu/arch/s390/include/asm/restorer.h +++ b/criu/arch/s390/include/asm/restorer.h @@ -40,11 +40,41 @@ : "0", "1", "2", "3", "4", "5", "6", "cc", "memory") #define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) + clone_restore_fn) \ + asm volatile( \ + /* + * clone3 only needs two arguments (r2, r3), this means + * we can use r4 and r5 for args and thread function. + * r4 and r5 are callee-saved and are not overwritten. + * No need to put these values on the child stack. + */ \ + "lgr %%r4,%4\n" /* Save args in %r4 */ \ + "lgr %%r5,%3\n" /* Save clone_restore_fn in %r5 */ \ + "lgr %%r2,%1\n" /* Parameter 1: clone_args */ \ + "lgr %%r3,%2\n" /* Parameter 2: size */ \ + /* + * On s390x a syscall is done sc . + * That only works for syscalls < 255. clone3 is 435, + * therefore it is necessary to load the syscall number + * into r1 and do 'svc 0'. + */ \ + "lghi %%r1,"__stringify(__NR_clone3)"\n" \ + "svc 0\n" \ + "ltgr %0,%%r2\n" /* Set and check "ret" */ \ + "jnz 0f\n" /* ret != 0: Continue caller */ \ + "lgr %%r2,%%r4\n" /* Thread arguments taken from r4. */ \ + "lgr %%r1,%%r5\n" /* Thread function taken from r5. */ \ + "aghi %%r15,-160\n" /* Prepare stack frame */ \ + "xc 0(8,%%r15),0(%%r15)\n" \ + "basr %%r14,%%r1\n" /* Jump to clone_restore_fn() */ \ + "j .+2\n" /* BUG(): Force PGM check */ \ +"0:\n" /* Continue caller */ \ + : "=d"(ret) \ + : "a"(&clone_args), \ + "d"(size), \ + "d"(clone_restore_fn), \ + "d"(args) \ + : "0", "1", "2", "3", "4", "5", "cc", "memory") #define arch_map_vdso(map, compat) -1 diff --git a/criu/kerndat.c b/criu/kerndat.c index 0772828bc1..2261cca60a 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,10 +992,10 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#ifndef CONFIG_X86_64 +#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) /* * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64. + * only implemented for X86_64 and S390X. */ kdat.has_clone3_set_tid = false; return 0; From b2d9412b92c0904aa038def5461f28be51c7e5a1 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 21 Jan 2020 14:20:05 +0100 Subject: [PATCH 194/277] arm: remove stack pointer from clobber list Just like on all other supported architectures gcc complains about the stack pointer register being part of the clobber list. This removes the stack pointer from the clobber list. Signed-off-by: Adrian Reber --- criu/arch/arm/include/asm/restore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/arch/arm/include/asm/restore.h b/criu/arch/arm/include/asm/restore.h index 4c64d58ef8..c3b64c5b7e 100644 --- a/criu/arch/arm/include/asm/restore.h +++ b/criu/arch/arm/include/asm/restore.h @@ -16,7 +16,7 @@ : "r"(new_sp), \ "r"(restore_task_exec_start), \ "r"(task_args) \ - : "sp", "r0", "r1", "memory") + : "r0", "r1", "memory") static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls) { From 0d9209d72e52bc91060d1f85375abef7ebb48a3e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 17 Jan 2020 13:35:48 +0100 Subject: [PATCH 195/277] ppc64le: use clone3() if possible This adds the parasite clone3() with set_tid wrapper for ppc64le. Signed-off-by: Adrian Reber --- criu/arch/ppc64/include/asm/restorer.h | 46 ++++++++++++++++++++++---- criu/kerndat.c | 4 +-- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h index 19bc3ea36e..c447eefeaa 100644 --- a/criu/arch/ppc64/include/asm/restorer.h +++ b/criu/arch/ppc64/include/asm/restorer.h @@ -48,12 +48,46 @@ "r"(&thread_args[i]) /* %6 */ \ : "memory","0","3","4","5","6","7","14","15") -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ +/* + * The clone3() function accepts following parameters: + * int clone3(struct clone_args *args, size_t size) + * + * Always consult the CLONE3 wrappers for other architectures + * for additional details. + * + * For PPC64LE the first parameter (clone_args) is passed in r3 and + * the second parameter (size) is passed in r4. + * + * This clone3() wrapper is based on the clone() wrapper from above. + */ \ + asm volatile( \ + "clone3_emul: \n" \ + "/* Save fn, args across syscall. */ \n" \ + "mr 14, %3 /* clone_restore_fn in r14 */ \n" \ + "mr 15, %4 /* &thread_args[i] in r15 */ \n" \ + "mr 3, %1 /* clone_args */ \n" \ + "mr 4, %2 /* size */ \n" \ + "li 0,"__stringify(__NR_clone3)" \n" \ + "sc \n" \ + "/* Check for child process. */ \n" \ + "cmpdi cr1,3,0 \n" \ + "crandc cr1*4+eq,cr1*4+eq,cr0*4+so \n" \ + "bne- cr1,clone3_end \n" \ + "/* child */ \n" \ + "addi 14, 14, 8 /* jump over r2 fixup */ \n" \ + "mtctr 14 \n" \ + "mr 3,15 \n" \ + "bctr \n" \ + "clone3_end: \n" \ + "mr %0,3 \n" \ + : "=r"(ret) /* %0 */ \ + : "r"(&clone_args), /* %1 */ \ + "r"(size), /* %2 */ \ + "r"(clone_restore_fn), /* %3 */ \ + "r"(args) /* %4 */ \ + : "memory","0","3","4","5","14","15") #define arch_map_vdso(map, compat) -1 diff --git a/criu/kerndat.c b/criu/kerndat.c index 2261cca60a..c1fc9259bf 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,10 +992,10 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) +#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) /* * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64 and S390X. + * only implemented for X86_64, S390X and PPC64LE. */ kdat.has_clone3_set_tid = false; return 0; From a36958151cbdb065ba9c4fcf1930c0f01f846a5f Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sat, 25 Jan 2020 13:25:21 +0100 Subject: [PATCH 196/277] clone3: handle clone3() with CLONE_PARENT clone3() explicitly blocks setting an exit_signal if CLONE_PARENT is specified. With clone() it also did not work, but there was no error message. The exit signal from the thread group leader is taken. Signed-off-by: Adrian Reber --- criu/clone-noasan.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index bcbc3e4bdd..a2190ba0af 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -61,7 +61,19 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, pr_debug("Creating process using clone3()\n"); - c_args.exit_signal = exit_signal; + /* + * clone3() explicitly blocks setting an exit_signal + * if CLONE_PARENT is specified. With clone() it also + * did not work, but there was no error message. The + * exit signal from the thread group leader is taken. + */ + if (!(flags & CLONE_PARENT)) { + if (exit_signal != SIGCHLD) { + pr_err("Exit signal not SIGCHLD\n"); + return -1; + } + c_args.exit_signal = exit_signal; + } c_args.flags = flags; c_args.set_tid = ptr_to_u64(&pid); c_args.set_tid_size = 1; From 784919b2586aecbe8966317bdd2b74a350bfccf0 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 19 Jan 2020 21:42:58 +0100 Subject: [PATCH 197/277] aarch64: use clone3() if possible This adds the parasite clone3() with set_tid wrapper for aarch64. Tested on Fedora 31 with 5.5.0-rc6. Signed-off-by: Adrian Reber --- criu/arch/aarch64/include/asm/restorer.h | 67 +++++++++++++++++++++--- criu/kerndat.c | 4 +- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h index 2fe58915be..120fa8fb20 100644 --- a/criu/arch/aarch64/include/asm/restorer.h +++ b/criu/arch/aarch64/include/asm/restorer.h @@ -42,12 +42,67 @@ "r"(&thread_args[i]) \ : "x0", "x1", "x2", "x3", "x8", "memory") -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) +/* + * Based on sysdeps/unix/sysv/linux/aarch64/clone.S + * + * int clone(int (*fn)(void *arg), x0 + * void *child_stack, x1 + * int flags, x2 + * void *arg, x3 + * pid_t *ptid, x4 + * struct user_desc *tls, x5 + * pid_t *ctid); x6 + * + * int clone3(struct clone_args *args, x0 + * size_t size); x1 + * + * Always consult the CLONE3 wrappers for other architectures + * for additional details. + * + */ + +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ + asm volatile( \ + /* In contrast to the clone() wrapper above this does not put + * the thread function and its arguments on the child stack, + * but uses registers to pass these parameters to the child process. + * Based on the glibc clone() wrapper at + * sysdeps/unix/sysv/linux/aarch64/clone.S. + */ \ + "clone3_emul: \n" \ + /* + * Based on the glibc clone() wrapper, which uses x10 and x11 + * to save the arguments for the child process, this does the same. + * x10 for the thread function and x11 for the thread arguments. + */ \ + "mov x10, %3 /* clone_restore_fn */ \n" \ + "mov x11, %4 /* args */ \n" \ + "mov x0, %1 /* &clone_args */ \n" \ + "mov x1, %2 /* size */ \n" \ + /* Load syscall number */ \ + "mov x8, #"__stringify(__NR_clone3)" \n" \ + /* Do the syscall */ \ + "svc #0 \n" \ + \ + "cbz x0, clone3_thread_run \n" \ + \ + "mov %0, x0 \n" \ + "b clone3_end \n" \ + \ + "clone3_thread_run: \n" \ + /* Move args to x0 */ \ + "mov x0, x11 \n" \ + /* Jump to clone_restore_fn */ \ + "br x10 \n" \ + \ + "clone3_end: \n" \ + : "=r"(ret) \ + : "r"(&clone_args), \ + "r"(size), \ + "r"(clone_restore_fn), \ + "r"(args) \ + : "x0", "x1", "x8", "x10", "x11", "memory") #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ diff --git a/criu/kerndat.c b/criu/kerndat.c index c1fc9259bf..4070e01d28 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,10 +992,10 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) +#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) && !defined(CONFIG_AARCH64) /* * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64, S390X and PPC64LE. + * only implemented for X86_64, S390X, AARCH64 and PPC64LE. */ kdat.has_clone3_set_tid = false; return 0; From 8d108a966f9ab4fb09e735d988f8605408d54789 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 22 Jan 2020 19:41:41 +0100 Subject: [PATCH 198/277] arm: use clone3() if it exists This is the last architecture specific change to make CRIU use clone3() with set_tid if available. Just as on all other architectures this adds a clone3() based assembler wrapper to be used in the restorer code. Tested on Fedora 31 with the same 5.5.0-rc6 kernel as on the other architectures. Signed-off-by: Adrian Reber --- criu/arch/arm/include/asm/restorer.h | 62 +++++++++++++++++++++++++--- criu/kerndat.c | 9 ---- 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h index ad4b58f93d..13ed15b263 100644 --- a/criu/arch/arm/include/asm/restorer.h +++ b/criu/arch/arm/include/asm/restorer.h @@ -43,12 +43,62 @@ "r"(&thread_args[i]) \ : "r0", "r1", "r2", "r3", "r7", "memory") -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) + +/* + * The clone3() assembler wrapper is based on the clone() wrapper above + * and on code from the glibc wrapper at + * sysdeps/unix/sysv/linux/arm/clone.S + * + * For arm it is necessary to change the child stack as on x86_64 as + * it seems there are not registers which stay the same over a syscall + * like on s390x, ppc64le and aarch64. + * + * Changing the child stack means that this code has to deal with the + * kernel doing stack + stack_size implicitly. + * + * int clone3(struct clone_args *args, size_t size) + */ + +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ + asm volatile( \ + "clone3_emul: \n" \ + /* Load thread stack pointer */ \ + "ldr r1, [%3] \n" \ + /* Load thread stack size */ \ + "mov r2, %4 \n" \ + /* Goto to the end of stack */ \ + "add r1, r1, r2 \n" \ + /* Load thread function and arguments and push on stack */ \ + "mov r2, %6 /* args */ \n" \ + "str r2, [r1, #4] /* args */ \n" \ + "mov r2, %5 /* function */ \n" \ + "str r2, [r1] /* function */ \n" \ + "mov r0, %1 /* clone_args */ \n" \ + "mov r1, %2 /* size */ \n" \ + "mov r7, #"__stringify(__NR_clone3)" \n" \ + "svc #0 \n" \ + \ + "cmp r0, #0 \n" \ + "beq thread3_run \n" \ + \ + "mov %0, r0 \n" \ + "b clone3_end \n" \ + \ + "thread3_run: \n" \ + "pop { r1 } \n" \ + "pop { r0 } \n" \ + "bx r1 \n" \ + \ + "clone3_end: \n" \ + : "=r"(ret) \ + : "r"(&clone_args), \ + "r"(size), \ + "r"(&clone_args.stack), \ + "r"(clone_args.stack_size), \ + "r"(clone_restore_fn), \ + "r"(args) \ + : "r0", "r1", "r2", "r7", "memory") #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ diff --git a/criu/kerndat.c b/criu/kerndat.c index 4070e01d28..e0b5731d54 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,15 +992,6 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) && !defined(CONFIG_AARCH64) - /* - * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64, S390X, AARCH64 and PPC64LE. - */ - kdat.has_clone3_set_tid = false; - return 0; -#endif - args.set_tid = -1; /* * On a system without clone3() this will return ENOSYS. From f075c1dffd41f0764538f1f382751ad5f66b56a9 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 14:43:59 +0000 Subject: [PATCH 199/277] travis: fix warning and errors from validation This fixes the validation errors from Travis: Build config validation root: deprecated key sudo (The key `sudo` has no effect anymore.) root: missing os, using the default linux root: key matrix is an alias for jobs, using jobs Signed-off-by: Adrian Reber --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 25dd6a29b3..7c36af0064 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ language: c -sudo: required +os: linux dist: bionic cache: ccache services: @@ -12,7 +12,7 @@ env: - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - TR_ARCH=openj9-test -matrix: +jobs: include: - os: linux arch: ppc64le From bdcf709f0084ad449b1641ef4a529f7c58727ec5 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 14 Jan 2020 12:04:40 +0300 Subject: [PATCH 200/277] compel: add -ffreestanding to force gcc not to use builtin memcpy, memset This patch fixes the problem with SSE (xmm) registers corruption on amd64 architecture. The problem was that gcc generates parasite blob that uses xmm registers, but we don't preserve this registers in CRIU when injecting parasite. Also, gcc, even with -nostdlib option uses builtin memcpy, memset functions that optimized for amd64 and involves SSE registers. It seems, that optimal solution is to use -ffreestanding gcc option to compile parasite. This option implies -fno-builtin and also it designed for OS kernels compilation/another code that suited to work on non-hosted environments and could prevent future sumilar bugs. To check that you amd64 CRIU build affected by this problem you could simply objdump -dS criu/pie/parasite.o | grep xmm Output should be empty. Reported-by: Diyu Zhou Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- compel/src/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compel/src/main.c b/compel/src/main.c index 51bac099fe..8b2c8bc8d9 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -21,7 +21,7 @@ #define CFLAGS_DEFAULT_SET \ "-Wstrict-prototypes " \ - "-fno-stack-protector -nostdlib -fomit-frame-pointer " + "-fno-stack-protector -nostdlib -fomit-frame-pointer -ffreestanding " #define COMPEL_CFLAGS_PIE CFLAGS_DEFAULT_SET "-fpie" #define COMPEL_CFLAGS_NOPIC CFLAGS_DEFAULT_SET "-fno-pic" From 049cd5b9243086d6485816f55d164a42f1087b2e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 30 Jan 2020 15:21:54 -0800 Subject: [PATCH 201/277] doc/Makefile: don't hide xmlto stderr In case asciidoc is installed and xmlto is not, make returns an error but there's no diagnostics shown, since "xmlto: command not found" goes to /dev/null. Remove the redirect. Signed-off-by: Kir Kolyshkin --- Documentation/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/Makefile b/Documentation/Makefile index cbc7ff2c81..5025e2b992 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -54,7 +54,7 @@ ifneq ($(USE_ASCIIDOCTOR),) $(Q) $(ASCIIDOC) -b manpage -d manpage -o $@ $< else $(Q) $(ASCIIDOC) -b docbook -d manpage -o $(patsubst %.1,%.xml,$@) $< - $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.1,%.xml,$@) 2>/dev/null + $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.1,%.xml,$@) endif %.8: %.txt $(FOOTER) custom.xsl @@ -63,7 +63,7 @@ ifneq ($(USE_ASCIIDOCTOR),) $(Q) $(ASCIIDOC) -b manpage -d manpage -o $@ $< else $(Q) $(ASCIIDOC) -b docbook -d manpage -o $(patsubst %.8,%.xml,$@) $< - $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.8,%.xml,$@) 2>/dev/null + $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.8,%.xml,$@) endif %.ps: %.1 From 9d219e8f1074f86f6ab11c32999e59145c0ec26c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 30 Jan 2020 15:27:07 -0800 Subject: [PATCH 202/277] criu(8): some minor rewording 1. Add a/the articles where I see them missing 2. s/Forbid/disable/ 3. s/crit/crit(1)/ as we're referring to a man page 4. Simplify some descriptions Signed-off-by: Kir Kolyshkin --- Documentation/criu.txt | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 133a094c0f..64b33ce6d6 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -24,8 +24,8 @@ on a different system, or both. OPTIONS ------- -Most of the true / false long options (the ones without arguments) can be -prefixed with *--no-* to negate the option (example: *--display-stats* +Most of the long flags can be +prefixed with *no-* to negate the option (example: *--display-stats* and *--no-display-stats*). Common options @@ -33,9 +33,8 @@ Common options Common options are applicable to any 'command'. *-v*[*v*...], *--verbosity*:: - Increase verbosity up from the default level. Multiple *v* can be used, - each increasing verbosity by one level. Using long option without argument - increases verbosity by one level. + Increase verbosity up from the default level. In case of short option, + multiple *v* can be used, each increasing verbosity by one. *-v*'num', *--verbosity*='num':: Set verbosity level to 'num'. The higher the level, the more output @@ -57,22 +56,22 @@ The following levels are available: Pass a specific configuration file to criu. *--no-default-config*:: - Forbid parsing of default configuration files. + Disable parsing of default configuration files. *--pidfile* 'file':: Write root task, service or page-server pid into a 'file'. *-o*, *--log-file* 'file':: - Write logging messages to 'file'. + Write logging messages to a 'file'. *--display-stats*:: - During dump as well as during restore *criu* collects information - like the time required to dump or restore the process or the + During dump, as well as during restore, *criu* collects some statistics, + like the time required to dump or restore the process, or the number of pages dumped or restored. This information is always - written to the files 'stats-dump' and 'stats-restore' and can - be easily displayed using *crit*. The option *--display-stats* - additionally prints out this information on the console at the end - of a dump or a restore. + saved to the *stats-dump* and *stats-restore* files, and can + be shown using *crit*(1). The option *--display-stats* + prints out this information on the console at the end + of a dump or restore operation. *-D*, *--images-dir* 'path':: Use 'path' as a base directory where to look for sets of image files. From 0011cbb9cb1d12c779b5dd9c01b4caf1f4dea30c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 30 Jan 2020 15:30:57 -0800 Subject: [PATCH 203/277] criu(8): fix for asciidoctor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0493724c8eda3 added support for using asciidoctor (instead of asciidoc + xmlto) to generate man pages. For some reason, asciidoctor does not deal well with some complex formatting that we use for options such as --external, leading to literal ’ and ' appearing in the man page instead of italic formatting. For example: > --inherit-fd fd[’N']:’resource' (here both N and resource should be in italic). Asciidoctor documentation (asciidoctor --help syntax) tells: > == Text Formatting > > .Constrained (applied at word boundaries) > *strong importance* (aka bold) > _stress emphasis_ (aka italic) > `monospaced` (aka typewriter text) > "`double`" and '`single`' typographic quotes > +passthrough text+ (substitutions disabled) > `+literal text+` (monospaced with substitutions disabled) > > .Unconstrained (applied anywhere) > **C**reate+**R**ead+**U**pdate+**D**elete > fan__freakin__tastic > ``mono``culture so I had to carefully replace *bold* with **bold** and 'italic' with __italic__ to make it all work. Tested with both terminal and postscript output, with both asciidoctor and asciidoc+xmlto. TODO: figure out how to fix examples (literal multi-line text), since asciidoctor does not display it in monospaced font (this is only true for postscript/pdf output so low priority). Signed-off-by: Kir Kolyshkin --- Documentation/criu.txt | 51 +++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 64b33ce6d6..a6b9f7fae2 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -36,8 +36,8 @@ Common options are applicable to any 'command'. Increase verbosity up from the default level. In case of short option, multiple *v* can be used, each increasing verbosity by one. -*-v*'num', *--verbosity*='num':: - Set verbosity level to 'num'. The higher the level, the more output +**-v**__num__, **--verbosity=**__num__:: + Set verbosity level to _num_. The higher the level, the more output is produced. + The following levels are available: @@ -184,7 +184,7 @@ In other words, do not use it unless really needed. *-s*, *--leave-stopped*:: Leave tasks in stopped state after checkpoint, instead of killing. -*--external* 'type'*[*'id'*]:*'value':: +*--external* __type__**[**__id__**]:**__value__:: Dump an instance of an external resource. The generic syntax is 'type' of resource, followed by resource 'id' (enclosed in literal square brackets), and optional 'value' (prepended by a literal colon). @@ -193,35 +193,36 @@ In other words, do not use it unless really needed. Note to restore external resources, either *--external* or *--inherit-fd* is used, depending on resource type. -*--external mnt[*'mountpoint'*]:*'name':: +*--external* **mnt[**__mountpoint__**]:**__name__:: Dump an external bind mount referenced by 'mountpoint', saving it to image under the identifier 'name'. -*--external mnt[]:*'flags':: +*--external* **mnt[]:**__flags__:: Dump all external bind mounts, autodetecting those. Optional 'flags' can contain *m* to also dump external master mounts, *s* to also dump external shared mounts (default behavior is to abort dumping if such mounts are found). If 'flags' are not provided, colon is optional. -*--external dev[*'major'*/*'minor'*]:*'name':: +*--external* **dev[**__major__**/**__minor__**]:**__name__:: Allow to dump a mount namespace having a real block device mounted. A block device is identified by its 'major' and 'minor' numbers, and *criu* saves its information to image under the identifier 'name'. -*--external file[*'mnt_id'*:*'inode'*]*:: +*--external* **file[**__mnt_id__**:**__inode__**]**:: Dump an external file, i.e. an opened file that is can not be resolved from the current mount namespace, which can not be dumped without using this option. The file is identified by 'mnt_id' (a field obtained from - */proc/*'pid'*/fdinfo/*'N') and 'inode' (as returned by *stat*(2)). + **/proc/**__pid__**/fdinfo/**__N__) and 'inode' (as returned by + *stat*(2)). -*--external tty[*'rdev'*:*'dev'*]*:: +*--external* **tty[**__rdev__**:**__dev__**]**:: Dump an external TTY, identified by *st_rdev* and *st_dev* fields returned by *stat*(2). -*--external unix[*'id'*]*:: +*--external* **unix[**__id__**]**:: Tell *criu* that one end of a pair of UNIX sockets (created by - *socketpair*(2)) with 'id' is OK to be disconnected. + *socketpair*(2)) with the given _id_ is OK to be disconnected. *--freeze-cgroup*:: Use cgroup freezer to collect processes. @@ -379,7 +380,7 @@ By default the option is set to *fpu* and *ins*. ~~~~~~~~~ Restores previously checkpointed processes. -*--inherit-fd* *fd[*'N'*]:*'resource':: +*--inherit-fd* **fd[**__N__**]:**__resource__:: Inherit a file descriptor. This option lets *criu* use an already opened file descriptor 'N' for restoring a file identified by 'resource'. This option can be used to restore an external resource dumped @@ -387,10 +388,10 @@ Restores previously checkpointed processes. + The 'resource' argument can be one of the following: + - - *tty[*'rdev'*:*'dev'*]* - - *pipe[*'inode'*]* - - *socket[*'inode'*]* - - *file[*'mnt_id'*:*'inode'*]* + - **tty[**__rdev__**:**__dev__**]** + - **pipe[**__inode__**]** + - **socket[**__inode__*]* + - **file[**__mnt_id__**:**__inode__**]** - 'path/to/file' + @@ -416,7 +417,7 @@ usually need to be escaped from shell. This option is required to restore a mount namespace. The directory 'path' must be a mount point and its parent must not be overmounted. -*--external* 'type'*[*'id'*]:*'value':: +*--external* __type__**[**__id__**]:**__value__:: Restore an instance of an external resource. The generic syntax is 'type' of resource, followed by resource 'id' (enclosed in literal square brackets), and optional 'value' (prepended by a literal colon). @@ -426,7 +427,7 @@ usually need to be escaped from shell. the help of *--external* *file*, *tty*, and *unix* options), option *--inherit-fd* should be used. -*--external mnt[*'name'*]:*'mountpoint':: +*--external* **mnt[**__name__**]:**__mountpoint__:: Restore an external bind mount referenced in the image by 'name', bind-mounting it from the host 'mountpoint' to a proper mount point. @@ -434,17 +435,17 @@ usually need to be escaped from shell. Restore all external bind mounts (dumped with the help of *--external mnt[]* auto-detection). -*--external dev[*'name'*]:*'/dev/path':: +*--external* **dev[**__name__**]:**__/dev/path__:: Restore an external mount device, identified in the image by 'name', using the existing block device '/dev/path'. -*--external veth[*'inner_dev'*]:*'outer_dev'*@*'bridge':: +*--external* **veth[**__inner_dev__**]:**__outer_dev__**@**__bridge__:: Set the outer VETH device name (corresponding to 'inner_dev' being - restored) to 'outer_dev'. If optional *@*'bridge' is specified, + restored) to 'outer_dev'. If optional **@**_bridge_ is specified, 'outer_dev' is added to that bridge. If the option is not used, 'outer_dev' will be autogenerated by the kernel. -*--external macvlan[*'inner_dev'*]:*'outer_dev':: +*--external* **macvlan[**__inner_dev__**]:**__outer_dev__:: When restoring an image that have a MacVLAN device in it, this option must be used to specify to which 'outer_dev' (an existing network device in CRIU namespace) the restored 'inner_dev' should be bound to. @@ -489,14 +490,14 @@ The 'mode' may be one of the following: *--tcp-close*:: Restore connected TCP sockets in closed state. -*--veth-pair* 'IN'*=*'OUT':: +*--veth-pair* __IN__**=**__OUT__:: Correspondence between outside and inside names of veth devices. *-l*, *--file-locks*:: Restore file locks from the image. -*--lsm-profile* 'type'*:*'name':: - Specify an LSM profile to be used during restore. The `type` can be +*--lsm-profile* __type__**:**__name__:: + Specify an LSM profile to be used during restore. The _type_ can be either *apparmor* or *selinux*. *--auto-dedup*:: From af9157ff696c4c7b45d9d4a47f8d7e71fa8c71bc Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Sun, 2 Feb 2020 18:45:59 +0000 Subject: [PATCH 204/277] criu: fix build failure against gcc-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On gcc-10 (and gcc-9 -fno-common) build fails as: ``` ld: criu/arch/x86/crtools.o:criu/include/cr_options.h:159: multiple definition of `rpc_cfg_file'; criu/arch/x86/cpu.o:criu/include/cr_options.h:159: first defined here make[2]: *** [scripts/nmk/scripts/build.mk:164: criu/arch/x86/crtools.built-in.o] Error 1 ``` gcc-10 will change the default from -fcommon to fno-common: https://gcc.gnu.org/PR85678. The error also happens if CFLAGS=-fno-common passed explicitly. Reported-by: Toralf Förster Bug: https://bugs.gentoo.org/707942 Signed-off-by: Sergei Trofimovich --- criu/config.c | 1 + criu/include/cr_options.h | 2 +- criu/include/pstree.h | 2 +- criu/include/tun.h | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/criu/config.c b/criu/config.c index 6fb6bfdff7..df5d851626 100644 --- a/criu/config.c +++ b/criu/config.c @@ -30,6 +30,7 @@ #include "common/xmalloc.h" struct cr_options opts; +char *rpc_cfg_file; static int count_elements(char **to_count) { diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index a7b040fbf3..e02848d2d0 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -156,7 +156,7 @@ struct cr_options { }; extern struct cr_options opts; -char *rpc_cfg_file; +extern char *rpc_cfg_file; extern int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, int state); extern int check_options(void); diff --git a/criu/include/pstree.h b/criu/include/pstree.h index 7303c1fedc..61ab0ce0eb 100644 --- a/criu/include/pstree.h +++ b/criu/include/pstree.h @@ -42,7 +42,7 @@ enum { }; #define FDS_EVENT (1 << FDS_EVENT_BIT) -struct pstree_item *current; +extern struct pstree_item *current; struct rst_info; /* See alloc_pstree_item() for details */ diff --git a/criu/include/tun.h b/criu/include/tun.h index ce0b266a64..b82c445a79 100644 --- a/criu/include/tun.h +++ b/criu/include/tun.h @@ -5,7 +5,7 @@ #define TUN_MINOR 200 #endif -struct ns_id *ns; +extern struct ns_id *ns; #include From c172688842c43070ae03376d205c7c0381cc1988 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 4 Feb 2020 23:12:22 -0800 Subject: [PATCH 205/277] test/vdso: check the code path when here is no API to map vDSO Signed-off-by: Andrei Vagin --- criu/crtools.c | 3 +++ criu/include/fault-injection.h | 1 + test/jenkins/criu-fault.sh | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/criu/crtools.c b/criu/crtools.c index 980e26a946..064aa7399a 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -177,6 +177,9 @@ int main(int argc, char *argv[], char *envp[]) if (kerndat_init()) return 1; + if (fault_injected(FI_CANNOT_MAP_VDSO)) + kdat.can_map_vdso = 0; + if (opts.deprecated_ok) pr_debug("DEPRECATED ON\n"); diff --git a/criu/include/fault-injection.h b/criu/include/fault-injection.h index 852d271668..31fe161784 100644 --- a/criu/include/fault-injection.h +++ b/criu/include/fault-injection.h @@ -17,6 +17,7 @@ enum faults { FI_NO_BREAKPOINTS = 130, FI_PARTIAL_PAGES = 131, FI_HUGE_ANON_SHMEM_ID = 132, + FI_CANNOT_MAP_VDSO = 133, FI_MAX, }; diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index ec6d26f892..4e3790e59c 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -12,6 +12,10 @@ prep ./test/zdtm.py run -t zdtm/static/vdso01 --fault 127 || fail ./test/zdtm.py run -t zdtm/static/vdso-proxy --fault 127 --iters 3 || fail +if [ "${COMPAT_TEST}" != "y" ] ; then + ./test/zdtm.py run -t zdtm/static/vdso01 --fault 133 -f h || fail +fi + ./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 2 --keep-going --report report || fail ./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 4 --keep-going --report report || fail @@ -23,3 +27,4 @@ prep ./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/static/maps01 --fault 132 -f h || fail + From f32a4e47dbf9588a85f0934f3c4c0e058028b159 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 4 Feb 2020 23:13:43 -0800 Subject: [PATCH 206/277] vdso: use correct offsets to remap vdso and vvar mappings In the current version, the offsets of remapping vvar and vdso regions are mixed up. If vdso is before vvar, vvar has to be mapped with the vdso_size offset. if vvar is before vdso, vdso has to be mapped with the vvar_size offset. Signed-off-by: Andrei Vagin --- criu/pie/parasite-vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 38da766804..3a1684d353 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -119,9 +119,9 @@ int vdso_do_park(struct vdso_maps *rt, unsigned long addr, unsigned long space) BUG_ON((vdso_size + vvar_size) < space); if (rt->sym.vdso_before_vvar) - return park_at(rt, addr, addr + vvar_size); + return park_at(rt, addr, addr + vdso_size); else - return park_at(rt, addr + vdso_size, addr); + return park_at(rt, addr + vvar_size, addr); } #ifndef CONFIG_COMPAT From 83e08820e68c726944f2601ac5da249a71c9ca24 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 5 Feb 2020 22:33:02 +0000 Subject: [PATCH 207/277] typo: fix missing space in error message Signed-off-by: Nicolas Viennot --- criu/sk-unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index f43aa21244..048ff44ae9 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1877,7 +1877,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) !(opts.ext_unix_sk)) { pr_err("External socket found in image. " "Consider using the --" USK_EXT_PARAM - "option to allow restoring it.\n"); + " option to allow restoring it.\n"); return -1; } From c365f704683b16ba6820912a2514798393a52659 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 208/277] memfd: add file support See "man memfd_create" for more information of what memfd is. This adds support for memfd open files, that are not not memory mapped. * We add a new kind of file: MEMFD. * We add two image types MEMFD_FILE, and MEMFD_INODE. MEMFD_FILE contains usual file information (e.g., position). MEMFD_INODE contains the memfd name, and a shmid identifier referring to the content. * We reuse the shmem facilities for dumping memfd content as it would be easier to support incremental checkpoints in the future. Signed-off-by: Nicolas Viennot --- Makefile.config | 2 +- criu/Makefile.crtools | 1 + criu/cr-restore.c | 2 + criu/files.c | 18 +- criu/image-desc.c | 1 + criu/include/image-desc.h | 2 + criu/include/magic.h | 1 + criu/include/memfd.h | 24 +++ criu/include/protobuf-desc.h | 2 + criu/include/shmem.h | 3 + criu/kerndat.c | 3 +- criu/memfd.c | 350 +++++++++++++++++++++++++++++++++++ criu/shmem.c | 66 ++++++- images/Makefile | 1 + images/fdinfo.proto | 3 + images/memfd.proto | 20 ++ lib/py/images/images.py | 2 + scripts/feature-tests.mak | 11 ++ 18 files changed, 503 insertions(+), 9 deletions(-) create mode 100644 criu/include/memfd.h create mode 100644 criu/memfd.c create mode 100644 images/memfd.proto diff --git a/Makefile.config b/Makefile.config index 161365960e..98ba5d892b 100644 --- a/Makefile.config +++ b/Makefile.config @@ -64,7 +64,7 @@ export DEFINES += $(FEATURE_DEFINES) export CFLAGS += $(FEATURE_DEFINES) FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \ - SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG + SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG MEMFD_CREATE # $1 - config name define gen-feature-test diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index d19ff8123b..7ba7137bd6 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -40,6 +40,7 @@ obj-y += libnetlink.o obj-y += log.o obj-y += lsm.o obj-y += mem.o +obj-y += memfd.o obj-y += mount.o obj-y += filesystems.o obj-y += namespaces.o diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 9cdb9b3ed2..d72bf995f0 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -76,6 +76,7 @@ #include "sk-queue.h" #include "sigframe.h" #include "fdstore.h" +#include "memfd.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -289,6 +290,7 @@ static struct collect_image_info *cinfos_files[] = { &fanotify_cinfo, &fanotify_mark_cinfo, &ext_file_cinfo, + &memfd_cinfo, }; /* These images are required to restore namespaces */ diff --git a/criu/files.c b/criu/files.c index e268978704..ea86deaa37 100644 --- a/criu/files.c +++ b/criu/files.c @@ -34,6 +34,7 @@ #include "sk-packet.h" #include "mount.h" #include "signalfd.h" +#include "memfd.h" #include "namespaces.h" #include "tun.h" #include "timerfd.h" @@ -546,13 +547,17 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, return -1; p.link = &link; - if (link.name[1] == '/') - return do_dump_gen_file(&p, lfd, ®file_dump_ops, e); - if (check_ns_proc(&link)) - return do_dump_gen_file(&p, lfd, &nsfile_dump_ops, e); + if (is_memfd(p.stat.st_dev, &link.name[1])) + ops = &memfd_dump_ops; + else if (link.name[1] == '/') + ops = ®file_dump_ops; + else if (check_ns_proc(&link)) + ops = &nsfile_dump_ops; + else + return dump_unsupp_fd(&p, lfd, "reg", link.name + 1, e); - return dump_unsupp_fd(&p, lfd, "reg", link.name + 1, e); + return do_dump_gen_file(&p, lfd, ops, e); } if (S_ISFIFO(p.stat.st_mode)) { @@ -1721,6 +1726,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i) case FD_TYPES__TTY: ret = collect_one_file_entry(fe, fe->tty->id, &fe->tty->base, &tty_cinfo); break; + case FD_TYPES__MEMFD: + ret = collect_one_file_entry(fe, fe->memfd->id, &fe->memfd->base, &memfd_cinfo); + break; } return ret; diff --git a/criu/image-desc.c b/criu/image-desc.c index ae5d817fea..b538a76ea5 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -66,6 +66,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(FS, "fs-%u"), FD_ENTRY(REMAP_FPATH, "remap-fpath"), FD_ENTRY_F(GHOST_FILE, "ghost-file-%x", O_NOBUF), + FD_ENTRY_F(MEMFD_INODE, "memfd-%u", O_NOBUF), FD_ENTRY(TCP_STREAM, "tcp-stream-%x"), FD_ENTRY(MNTS, "mountpoints-%u"), FD_ENTRY(NETDEV, "netdev-%u"), diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 6db8bf94f5..9ca9643a1c 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -106,6 +106,8 @@ enum { CR_FD_FIFO, CR_FD_PIPES, CR_FD_TTY_FILES, + CR_FD_MEMFD_FILE, + CR_FD_MEMFD_INODE, CR_FD_AUTOFS, diff --git a/criu/include/magic.h b/criu/include/magic.h index 1a583f4ed7..bdaca968d2 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -94,6 +94,7 @@ #define BINFMT_MISC_MAGIC 0x67343323 /* Apatity */ #define AUTOFS_MAGIC 0x49353943 /* Sochi */ #define FILES_MAGIC 0x56303138 /* Toropets */ +#define MEMFD_INODE_MAGIC 0x48453499 /* Dnipro */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/memfd.h b/criu/include/memfd.h new file mode 100644 index 0000000000..c1d7949cb7 --- /dev/null +++ b/criu/include/memfd.h @@ -0,0 +1,24 @@ +#ifndef __CR_MEMFD_H__ +#define __CR_MEMFD_H__ + +#include +#include "int.h" +#include "common/config.h" + +extern int is_memfd(dev_t dev, const char *path); +extern const struct fdtype_ops memfd_dump_ops; + +extern struct collect_image_info memfd_cinfo; + +#ifdef CONFIG_HAS_MEMFD_CREATE +# include +#else +# include +# include +static inline int memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} +#endif /* CONFIG_HAS_MEMFD_CREATE */ + +#endif /* __CR_MEMFD_H__ */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 21ba27193f..d725d199ee 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -65,6 +65,8 @@ enum { PB_LOCAL_IMAGE, /* Header for reading/writing images from/to proxy or cache. */ PB_LOCAL_IMAGE_REPLY, /* Header for reading/writing images reply. */ PB_SNAPSHOT_ID, /* Contains a single id. Used for reading/writing ids from proxy or cache. */ + PB_MEMFD_FILE, + PB_MEMFD_INODE, /* 60 */ /* PB_AUTOGEN_STOP */ diff --git a/criu/include/shmem.h b/criu/include/shmem.h index 04ab8d0763..9afdb799af 100644 --- a/criu/include/shmem.h +++ b/criu/include/shmem.h @@ -13,8 +13,11 @@ extern int collect_sysv_shmem(unsigned long shmid, unsigned long size); extern int cr_dump_shmem(void); extern int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map); extern int fixup_sysv_shmems(void); +extern int dump_one_memfd_shmem(int fd, unsigned long shmid, unsigned long size); extern int dump_one_sysv_shmem(void *addr, unsigned long size, unsigned long shmid); extern int restore_sysv_shmem_content(void *addr, unsigned long size, unsigned long shmid); +extern int restore_memfd_shmem_content(int fd, unsigned long shmid, unsigned long size); + #define SYSV_SHMEM_SKIP_FD (0x7fffffff) diff --git a/criu/kerndat.c b/criu/kerndat.c index e0b5731d54..8ac83820b9 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -42,6 +42,7 @@ #include "vdso.h" #include "kcmp.h" #include "sched.h" +#include "memfd.h" struct kerndat_s kdat = { }; @@ -409,7 +410,7 @@ static bool kerndat_has_memfd_create(void) { int ret; - ret = syscall(SYS_memfd_create, NULL, 0); + ret = memfd_create(NULL, 0); if (ret == -1 && errno == ENOSYS) kdat.has_memfd = false; diff --git a/criu/memfd.c b/criu/memfd.c new file mode 100644 index 0000000000..bcef35e754 --- /dev/null +++ b/criu/memfd.c @@ -0,0 +1,350 @@ +#include + +#include "common/compiler.h" +#include "common/lock.h" +#include "memfd.h" +#include "fdinfo.h" +#include "imgset.h" +#include "image.h" +#include "util.h" +#include "log.h" +#include "files.h" +#include "fs-magic.h" +#include "kerndat.h" +#include "files-reg.h" +#include "rst-malloc.h" +#include "fdstore.h" +#include "file-ids.h" +#include "namespaces.h" +#include "shmem.h" + +#include "protobuf.h" +#include "images/memfd.pb-c.h" + +#define MEMFD_PREFIX "/memfd:" +#define MEMFD_PREFIX_LEN (sizeof(MEMFD_PREFIX)-1) + +struct memfd_inode { + struct list_head list; + u32 id; + union { + /* Only for dump */ + struct { + u32 dev; + u32 ino; + }; + /* Only for restore */ + struct { + mutex_t lock; + int fdstore_id; + }; + }; +}; + +static LIST_HEAD(memfd_inodes); + +/* + * Dump only + */ + +static u32 memfd_inode_ids = 1; + +int is_memfd(dev_t dev, const char *path) +{ + /* + * TODO When MAP_HUGETLB is used, the file device is not shmem_dev, + * Note that other parts of CRIU have similar issues, see + * is_anon_shmem_map(). + */ + return dev == kdat.shmem_dev && + !strncmp(path, MEMFD_PREFIX, MEMFD_PREFIX_LEN); +} + +static int dump_memfd_inode(int fd, struct memfd_inode *inode, + const char *name, const struct stat *st) +{ + int ret = -1; + struct cr_img *img = NULL; + MemfdInodeEntry mie = MEMFD_INODE_ENTRY__INIT; + u32 shmid; + + /* + * shmids are chosen as the inode number of the corresponding mmaped + * file. See handle_vma() in proc_parse.c. + * It works for memfd too, because we share the same device as the + * shmem device. + */ + shmid = inode->ino; + + pr_info("Dumping memfd:%s contents (id %#x, shmid: %#x, size: %"PRIu64")\n", + name, inode->id, shmid, st->st_size); + + if (dump_one_memfd_shmem(fd, shmid, st->st_size) < 0) + goto out; + + img = open_image(CR_FD_MEMFD_INODE, O_DUMP, inode->id); + if (!img) + goto out; + + mie.uid = userns_uid(st->st_uid); + mie.gid = userns_gid(st->st_gid); + mie.name = (char *)name; + mie.size = st->st_size; + mie.shmid = shmid; + + if (pb_write_one(img, &mie, PB_MEMFD_INODE)) + goto out; + + ret = 0; + +out: + if (img) + close_image(img); + return ret; +} + +static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) +{ + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) + if ((inode->dev == st->st_dev) && (inode->ino == st->st_ino)) + return inode; + + inode = xmalloc(sizeof(*inode)); + if (inode == NULL) + return NULL; + + inode->dev = st->st_dev; + inode->ino = st->st_ino; + inode->id = memfd_inode_ids++; + + if (dump_memfd_inode(lfd, inode, name, st)) { + xfree(inode); + return NULL; + } + + list_add_tail(&inode->list, &memfd_inodes); + + return inode; +} + +static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) +{ + MemfdFileEntry mfe = MEMFD_FILE_ENTRY__INIT; + FileEntry fe = FILE_ENTRY__INIT; + struct memfd_inode *inode; + struct fd_link _link, *link; + const char *name; + + if (!p->link) { + if (fill_fdlink(lfd, p, &_link)) + return -1; + link = &_link; + } else + link = p->link; + + strip_deleted(link); + name = &link->name[1+MEMFD_PREFIX_LEN]; + + inode = dump_unique_memfd_inode(lfd, name, &p->stat); + if (!inode) + return -1; + + mfe.id = id; + mfe.flags = p->flags; + mfe.pos = p->pos; + mfe.fown = (FownEntry *)&p->fown; + mfe.inode_id = inode->id; + + fe.type = FD_TYPES__MEMFD; + fe.id = mfe.id; + fe.memfd = &mfe; + + return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); +} + +const struct fdtype_ops memfd_dump_ops = { + .type = FD_TYPES__MEMFD, + .dump = dump_one_memfd, +}; + + +/* + * Restore only + */ + +struct memfd_info { + MemfdFileEntry *mfe; + struct file_desc d; + struct memfd_inode *inode; +}; + +static struct memfd_inode *memfd_alloc_inode(int id) +{ + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) + if (inode->id == id) + return inode; + + inode = shmalloc(sizeof(*inode)); + if (!inode) + return NULL; + + inode->id = id; + mutex_init(&inode->lock); + inode->fdstore_id = -1; + + list_add_tail(&inode->list, &memfd_inodes); + return inode; +} + +extern int restore_memfd_shm(int fd, u64 id, u64 size); +static int memfd_open_inode_nocache(struct memfd_inode *inode) +{ + MemfdInodeEntry *mie = NULL; + struct cr_img *img = NULL; + int fd = -1; + int ret = -1; + int flags; + + img = open_image(CR_FD_MEMFD_INODE, O_RSTR, inode->id); + if (!img) + goto out; + + if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) + goto out; + + fd = memfd_create(mie->name, 0); + if (fd < 0) { + pr_perror("Can't create memfd:%s", mie->name); + goto out; + } + + if (restore_memfd_shmem_content(fd, mie->shmid, mie->size)) + goto out; + + if (fchown(fd, mie->uid, mie->gid)) { + pr_perror("Can't change uid %d gid %d of memfd:%s", + (int)mie->uid, (int)mie->gid, mie->name); + goto out; + } + + inode->fdstore_id = fdstore_add(fd); + if (inode->fdstore_id < 0) + goto out; + + ret = fd; + fd = -1; + +out: + if (fd != -1) + close(fd); + if (img) + close_image(img); + if (mie) + memfd_inode_entry__free_unpacked(mie, NULL); + return ret; +} + +static int memfd_open_inode(struct memfd_inode *inode) +{ + int fd; + + if (inode->fdstore_id != -1) + return fdstore_get(inode->fdstore_id); + + mutex_lock(&inode->lock); + if (inode->fdstore_id != -1) + fd = fdstore_get(inode->fdstore_id); + else + fd = memfd_open_inode_nocache(inode); + mutex_unlock(&inode->lock); + + return fd; +} + +static int memfd_open(struct file_desc *d, u32 *fdflags) +{ + char lpath[PSFDS]; + struct memfd_info *mfi; + MemfdFileEntry *mfe; + int fd, _fd; + u32 flags; + + mfi = container_of(d, struct memfd_info, d); + mfe = mfi->mfe; + + pr_info("Restoring memfd id=%d\n", mfe->id); + + fd = memfd_open_inode(mfi->inode); + if (fd < 0) + goto err; + + /* Reopen the fd with original permissions */ + sprintf(lpath, "/proc/self/fd/%d", fd); + flags = fdflags ? *fdflags : mfe->flags; + /* + * Ideally we should call compat version open() to not force the + * O_LARGEFILE file flag with regular open(). It doesn't seem that + * important though. + */ + _fd = open(lpath, flags); + if (_fd < 0) { + pr_perror("Can't reopen memfd id=%d", mfe->id); + goto err; + } + close(fd); + fd = _fd; + + if (restore_fown(fd, mfe->fown) < 0) + goto err; + + if (lseek(fd, mfe->pos, SEEK_SET) < 0) { + pr_perror("Can't restore file position of memfd id=%d", mfe->id); + goto err; + } + + return fd; + +err: + if (fd >= 0) + close(fd); + return -1; +} + +static int memfd_open_fe_fd(struct file_desc *fd, int *new_fd) +{ + int tmp; + + tmp = memfd_open(fd, NULL); + if (tmp < 0) + return -1; + *new_fd = tmp; + return 0; +} + +static struct file_desc_ops memfd_desc_ops = { + .type = FD_TYPES__MEMFD, + .open = memfd_open_fe_fd, +}; + +static int collect_one_memfd(void *o, ProtobufCMessage *msg, struct cr_img *i) +{ + struct memfd_info *info = o; + + info->mfe = pb_msg(msg, MemfdFileEntry); + info->inode = memfd_alloc_inode(info->mfe->inode_id); + if (!info->inode) + return -1; + + return file_desc_add(&info->d, info->mfe->id, &memfd_desc_ops); +} + +struct collect_image_info memfd_cinfo = { + .fd_type = CR_FD_MEMFD_FILE, + .pb_type = PB_MEMFD_FILE, + .priv_size = sizeof(struct memfd_info), + .collect = collect_one_memfd, +}; diff --git a/criu/shmem.c b/criu/shmem.c index 6978621fe3..76b537d9ed 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -23,6 +23,7 @@ #include "types.h" #include "page.h" #include "util.h" +#include "memfd.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" @@ -490,7 +491,7 @@ static int do_restore_shmem_content(void *addr, unsigned long size, unsigned lon return ret; } -static int restore_shmem_content(void *addr, struct shmem_info *si) +int restore_shmem_content(void *addr, struct shmem_info *si) { return do_restore_shmem_content(addr, si->size, si->shmid); } @@ -500,6 +501,41 @@ int restore_sysv_shmem_content(void *addr, unsigned long size, unsigned long shm return do_restore_shmem_content(addr, round_up(size, PAGE_SIZE), shmid); } +int restore_memfd_shmem_content(int fd, unsigned long shmid, unsigned long size) +{ + void *addr = NULL; + int ret = 1; + + if (size == 0) + return 0; + + if (ftruncate(fd, size) < 0) { + pr_perror("Can't resize shmem 0x%lx size=%ld", shmid, size); + goto out; + } + + addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + pr_perror("Can't mmap shmem 0x%lx size=%ld", shmid, size); + goto out; + } + + /* + * do_restore_shmem_content needs size to be page aligned. + */ + if (do_restore_shmem_content(addr, round_up(size, PAGE_SIZE), shmid) < 0) { + pr_err("Can't restore shmem content\n"); + goto out; + } + + ret = 0; + +out: + if (addr) + munmap(addr, size); + return ret; +} + static int open_shmem(int pid, struct vma_area *vma) { VmaEntry *vi = vma->e; @@ -532,7 +568,7 @@ static int open_shmem(int pid, struct vma_area *vma) flags = MAP_SHARED; if (kdat.has_memfd) { - f = syscall(SYS_memfd_create, "", 0); + f = memfd_create("", 0); if (f < 0) { pr_perror("Unable to create memfd"); goto err; @@ -779,6 +815,32 @@ static int dump_one_shmem(struct shmem_info *si) return ret; } +int dump_one_memfd_shmem(int fd, unsigned long shmid, unsigned long size) +{ + int ret = -1; + void *addr; + struct shmem_info si; + + if (size == 0) + return 0; + + memset(&si, 0, sizeof(si)); + si.shmid = shmid; + si.size = size; + + addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) { + pr_perror("Can't mmap shmem 0x%lx", shmid); + goto err; + } + + ret = do_dump_one_shmem(fd, addr, &si); + + munmap(addr, size); +err: + return ret; +} + int dump_one_sysv_shmem(void *addr, unsigned long size, unsigned long shmid) { int fd, ret; diff --git a/images/Makefile b/images/Makefile index 4de6990b46..fba86b3058 100644 --- a/images/Makefile +++ b/images/Makefile @@ -64,6 +64,7 @@ proto-obj-y += autofs.o proto-obj-y += macvlan.o proto-obj-y += sit.o proto-obj-y += remote-image.o +proto-obj-y += memfd.o CFLAGS += -iquote $(obj)/ diff --git a/images/fdinfo.proto b/images/fdinfo.proto index 77e375aa94..d966d5bc5b 100644 --- a/images/fdinfo.proto +++ b/images/fdinfo.proto @@ -16,6 +16,7 @@ import "sk-unix.proto"; import "fifo.proto"; import "pipe.proto"; import "tty.proto"; +import "memfd.proto"; enum fd_types { UND = 0; @@ -36,6 +37,7 @@ enum fd_types { TUNF = 15; EXT = 16; TIMERFD = 17; + MEMFD = 18; /* Any number above the real used. Not stored to image */ CTL_TTY = 65534; @@ -70,4 +72,5 @@ message file_entry { optional fifo_entry fifo = 17; optional pipe_entry pipe = 18; optional tty_file_entry tty = 19; + optional memfd_file_entry memfd = 20; } diff --git a/images/memfd.proto b/images/memfd.proto new file mode 100644 index 0000000000..8eccd6f4fb --- /dev/null +++ b/images/memfd.proto @@ -0,0 +1,20 @@ +syntax = "proto2"; + +import "opts.proto"; +import "fown.proto"; + +message memfd_file_entry { + required uint32 id = 1; + required uint32 flags = 2 [(criu).flags = "rfile.flags"]; + required uint64 pos = 3; + required fown_entry fown = 4; + required uint32 inode_id = 5; +}; + +message memfd_inode_entry { + required string name = 1; + required uint32 uid = 2; + required uint32 gid = 3; + required uint64 size = 4; + required uint32 shmid = 5; +}; diff --git a/lib/py/images/images.py b/lib/py/images/images.py index 3eedfca69d..dca080657a 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -522,6 +522,8 @@ def skip(self, f, pbuff): 'AUTOFS': entry_handler(pb.autofs_entry), 'FILES': entry_handler(pb.file_entry), 'CPUINFO': entry_handler(pb.cpuinfo_entry), + 'MEMFD_FILE': entry_handler(pb.memfd_file_entry), + 'MEMFD_INODE': entry_handler(pb.memfd_inode_entry), } diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 6f67c6035f..21b3900923 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -171,3 +171,14 @@ int main(int argc, char **argv) } endef + +define FEATURE_TEST_MEMFD_CREATE + +#include +#include + +int main(void) +{ + return memfd_create(NULL, 0); +} +endef From 1386dbbff6197f57f021b7b2b4a657079e564659 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 209/277] files: increase path buffer size in inherited_fd() Prepare memfd to use inherited_fd(), needing long path names support. Signed-off-by: Nicolas Viennot --- criu/files.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/files.c b/criu/files.c index ea86deaa37..789b08a4cd 100644 --- a/criu/files.c +++ b/criu/files.c @@ -1608,7 +1608,7 @@ int inherit_fd_lookup_id(char *id) bool inherited_fd(struct file_desc *d, int *fd_p) { - char buf[32], *id_str; + char buf[PATH_MAX], *id_str; int i_fd; if (!d->ops->name) From 883d3e46ffc984182bf4b9a644ea7903c41f5b27 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 210/277] memfd: add --inherit-fd support Upon file restore, inherited_fd() is called to check for a user-defined inerit-fd override. Note that the MEMFD_INODE image is read at each invocation (memfd name is not cached). Signed-off-by: Nicolas Viennot --- criu/crtools.c | 1 + criu/memfd.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/criu/crtools.c b/criu/crtools.c index 064aa7399a..a9910a7c01 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -415,6 +415,7 @@ int main(int argc, char *argv[], char *envp[]) " pipe[inode]\n" " socket[inode]\n" " file[mnt_id:inode]\n" +" /memfd:name\n" " path/to/file\n" " --empty-ns net Create a namespace, but don't restore its properties\n" " (assuming it will be restored by action scripts)\n" diff --git a/criu/memfd.c b/criu/memfd.c index bcef35e754..36b3be8dfb 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -276,6 +276,9 @@ static int memfd_open(struct file_desc *d, u32 *fdflags) mfi = container_of(d, struct memfd_info, d); mfe = mfi->mfe; + if (inherited_fd(d, &fd)) + return fd; + pr_info("Restoring memfd id=%d\n", mfe->id); fd = memfd_open_inode(mfi->inode); @@ -325,9 +328,42 @@ static int memfd_open_fe_fd(struct file_desc *fd, int *new_fd) return 0; } +static char *memfd_d_name(struct file_desc *d, char *buf, size_t s) +{ + MemfdInodeEntry *mie = NULL; + struct cr_img *img = NULL; + struct memfd_info *mfi; + char *ret = NULL; + + mfi = container_of(d, struct memfd_info, d); + + img = open_image(CR_FD_MEMFD_INODE, O_RSTR, mfi->inode->id); + if (!img) + goto out; + + if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) + goto out; + + if (snprintf(buf, s, "%s%s", MEMFD_PREFIX, mie->name) >= s) { + pr_err("Buffer too small for memfd name %s\n", mie->name); + goto out; + } + + ret = buf; + +out: + if (img) + close_image(img); + if (mie) + memfd_inode_entry__free_unpacked(mie, NULL); + + return ret; +} + static struct file_desc_ops memfd_desc_ops = { .type = FD_TYPES__MEMFD, .open = memfd_open_fe_fd, + .name = memfd_d_name, }; static int collect_one_memfd(void *o, ProtobufCMessage *msg, struct cr_img *i) From 8c006c28f307138530f5ee5f4b76ea33dc8ff325 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 211/277] memfd: add memory mapping support * During checkpoint, we add a vma flags: VMA_AREA_MEMFD to denote memfd regions. * Even though memfd is backed by the shmem device, we use the file semantics of memfd (via /proc/map_files/) which we already have support for. Signed-off-by: Nicolas Viennot --- criu/cr-dump.c | 6 +++++- criu/files-reg.c | 11 +++++++++-- criu/include/image.h | 1 + criu/include/memfd.h | 6 ++++++ criu/memfd.c | 19 ++++++++++++++++++- criu/proc_parse.c | 35 +++++++++++++++++++++++++++++++++++ 6 files changed, 74 insertions(+), 4 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 0c9b6d6c7d..a8188724f2 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -81,6 +81,7 @@ #include "dump.h" #include "eventpoll.h" #include "img-remote.h" +#include "memfd.h" /* * Architectures can overwrite this function to restore register sets that @@ -415,7 +416,10 @@ static int dump_filemap(struct vma_area *vma_area, int fd) /* Flags will be set during restore in open_filmap() */ - ret = dump_one_reg_file_cond(fd, &id, &p); + if (vma->status & VMA_AREA_MEMFD) + ret = dump_one_memfd_cond(fd, &id, &p); + else + ret = dump_one_reg_file_cond(fd, &id, &p); vma->shmid = id; return ret; diff --git a/criu/files-reg.c b/criu/files-reg.c index 90a90024f2..e07bf4ad18 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -35,6 +35,7 @@ #include "pstree.h" #include "fault-injection.h" #include "external.h" +#include "memfd.h" #include "protobuf.h" #include "util.h" @@ -1920,7 +1921,10 @@ static int open_filemap(int pid, struct vma_area *vma) flags = vma->e->fdflags; if (ctx.flags != flags || ctx.desc != vma->vmfd) { - ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags); + if (vma->e->status & VMA_AREA_MEMFD) + ret = memfd_open(vma->vmfd, &flags); + else + ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags); if (ret < 0) return ret; @@ -1950,7 +1954,10 @@ int collect_filemap(struct vma_area *vma) vma->e->fdflags = O_RDONLY; } - fd = collect_special_file(vma->e->shmid); + if (vma->e->status & VMA_AREA_MEMFD) + fd = collect_memfd(vma->e->shmid); + else + fd = collect_special_file(vma->e->shmid); if (!fd) return -1; diff --git a/criu/include/image.h b/criu/include/image.h index 2baa394960..1c7cc54718 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -83,6 +83,7 @@ #define VMA_AREA_SOCKET (1 << 11) #define VMA_AREA_VVAR (1 << 12) #define VMA_AREA_AIORING (1 << 13) +#define VMA_AREA_MEMFD (1 << 14) #define VMA_CLOSE (1 << 28) #define VMA_NO_PROT_WRITE (1 << 29) diff --git a/criu/include/memfd.h b/criu/include/memfd.h index c1d7949cb7..0a9aeff2fa 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -5,10 +5,16 @@ #include "int.h" #include "common/config.h" +struct fd_parms; +struct file_desc; + extern int is_memfd(dev_t dev, const char *path); +extern int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms); extern const struct fdtype_ops memfd_dump_ops; +extern int memfd_open(struct file_desc *d, u32 *fdflags); extern struct collect_image_info memfd_cinfo; +extern struct file_desc *collect_memfd(u32 id); #ifdef CONFIG_HAS_MEMFD_CREATE # include diff --git a/criu/memfd.c b/criu/memfd.c index 36b3be8dfb..1cca96a323 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -164,6 +164,13 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); } +int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms) +{ + if (fd_id_generate_special(parms, id)) + return dump_one_memfd(lfd, *id, parms); + return 0; +} + const struct fdtype_ops memfd_dump_ops = { .type = FD_TYPES__MEMFD, .dump = dump_one_memfd, @@ -265,7 +272,7 @@ static int memfd_open_inode(struct memfd_inode *inode) return fd; } -static int memfd_open(struct file_desc *d, u32 *fdflags) +int memfd_open(struct file_desc *d, u32 *fdflags) { char lpath[PSFDS]; struct memfd_info *mfi; @@ -384,3 +391,13 @@ struct collect_image_info memfd_cinfo = { .priv_size = sizeof(struct memfd_info), .collect = collect_one_memfd, }; + +struct file_desc *collect_memfd(u32 id) { + struct file_desc *fdesc; + + fdesc = find_file_desc_raw(FD_TYPES__MEMFD, id); + if (fdesc == NULL) + pr_err("No entry for memfd %#x\n", id); + + return fdesc; +} diff --git a/criu/proc_parse.c b/criu/proc_parse.c index fa7644992b..468afcdf38 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -41,6 +41,7 @@ #include "timerfd.h" #include "path.h" #include "fault-injection.h" +#include "memfd.h" #include "protobuf.h" #include "images/fdinfo.pb-c.h" @@ -303,6 +304,26 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, } vfi_dev = makedev(vfi->dev_maj, vfi->dev_min); + + if (is_memfd(vfi_dev, fname)) { + struct fd_link link; + link.len = strlen(fname); + strlcpy(link.name, fname, sizeof(link.name)); + strip_deleted(&link); + + /* + * The error EPERM will be shown in the following pr_perror(). + * It comes from the previous open() call. + */ + pr_perror("Can't open mapped [%s]", link.name); + + /* + * TODO Perhaps we could do better than failing and dump the + * memory like what is being done in shmem.c + */ + return -1; + } + if (is_anon_shmem_map(vfi_dev)) { if (!(vma->e->flags & MAP_SHARED)) return -1; @@ -578,7 +599,20 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, /* * /dev/zero stands for anon-shared mapping * otherwise it's some file mapping. + * + * We treat memfd mappings as regular file mappings because + * their backing can be seen as files, which is easy to + * support. So even though memfd is an anonymous shmem, we + * treat it differently. + * Note: maybe we should revisit this as /proc/map_files/ + * may not always be accessible. */ + + if (is_memfd(st_buf->st_dev, file_path)) { + vma_area->e->status |= VMA_AREA_MEMFD; + goto normal_file; + } + if (is_anon_shmem_map(st_buf->st_dev)) { if (!(vma_area->e->flags & MAP_SHARED)) goto err_bogus_mapping; @@ -594,6 +628,7 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE; } } else { +normal_file: if (vma_area->e->flags & MAP_PRIVATE) vma_area->e->status |= VMA_FILE_PRIVATE; else From 3eb95b50c543cff5aca4a6500597c6c0bf5e8921 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 212/277] memfd: add seals support See "man fcntl" for more information about seals. memfd are the only files that can be sealed, currently. For this reason, we dump the seal values in the MEMFD_INODE image. Restoring seals must be done carefully as the seal F_SEAL_FUTURE_WRITE prevents future write access. This means that any memory mapping with write access must be restored before restoring the seals. Signed-off-by: Nicolas Viennot --- criu/cr-restore.c | 4 +++ criu/include/fcntl.h | 8 ++++++ criu/include/memfd.h | 1 + criu/memfd.c | 59 +++++++++++++++++++++++++++++++++++++++- images/memfd.proto | 1 + lib/py/images/pb2dict.py | 9 ++++++ 6 files changed, 81 insertions(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index d72bf995f0..76b2a81159 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2232,6 +2232,10 @@ static int restore_root_task(struct pstree_item *init) if (ret < 0) goto out_kill; + ret = apply_memfd_seals(); + if (ret < 0) + goto out_kill; + /* * Zombies die after CR_STATE_RESTORE which is switched * by root task, not by us. See comment before CR_STATE_FORKING diff --git a/criu/include/fcntl.h b/criu/include/fcntl.h index d9c5c5e7b3..ea9d48c72f 100644 --- a/criu/include/fcntl.h +++ b/criu/include/fcntl.h @@ -34,6 +34,14 @@ struct f_owner_ex { # define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) #endif +#ifndef F_ADD_SEALS +# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_GET_SEALS +# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif + #ifndef O_PATH # define O_PATH 010000000 #endif diff --git a/criu/include/memfd.h b/criu/include/memfd.h index 0a9aeff2fa..2d8eda5458 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -15,6 +15,7 @@ extern const struct fdtype_ops memfd_dump_ops; extern int memfd_open(struct file_desc *d, u32 *fdflags); extern struct collect_image_info memfd_cinfo; extern struct file_desc *collect_memfd(u32 id); +extern int apply_memfd_seals(void); #ifdef CONFIG_HAS_MEMFD_CREATE # include diff --git a/criu/memfd.c b/criu/memfd.c index 1cca96a323..d17c10fb75 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -1,4 +1,5 @@ #include +#include #include "common/compiler.h" #include "common/lock.h" @@ -24,6 +25,13 @@ #define MEMFD_PREFIX "/memfd:" #define MEMFD_PREFIX_LEN (sizeof(MEMFD_PREFIX)-1) +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +/* Linux 5.1+ */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ + struct memfd_inode { struct list_head list; u32 id; @@ -37,6 +45,7 @@ struct memfd_inode { struct { mutex_t lock; int fdstore_id; + unsigned int pending_seals; }; }; }; @@ -92,6 +101,10 @@ static int dump_memfd_inode(int fd, struct memfd_inode *inode, mie.size = st->st_size; mie.shmid = shmid; + mie.seals = fcntl(fd, F_GET_SEALS); + if (mie.seals == -1) + goto out; + if (pb_write_one(img, &mie, PB_MEMFD_INODE)) goto out; @@ -187,6 +200,8 @@ struct memfd_info { struct memfd_inode *inode; }; +static int memfd_open_inode(struct memfd_inode *inode); + static struct memfd_inode *memfd_alloc_inode(int id) { struct memfd_inode *inode; @@ -202,6 +217,7 @@ static struct memfd_inode *memfd_alloc_inode(int id) inode->id = id; mutex_init(&inode->lock); inode->fdstore_id = -1; + inode->pending_seals = 0; list_add_tail(&inode->list, &memfd_inodes); return inode; @@ -223,7 +239,16 @@ static int memfd_open_inode_nocache(struct memfd_inode *inode) if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) goto out; - fd = memfd_create(mie->name, 0); + if (mie->seals == F_SEAL_SEAL) { + inode->pending_seals = 0; + flags = 0; + } else { + /* Seals are applied later due to F_SEAL_FUTURE_WRITE */ + inode->pending_seals = mie->seals; + flags = MFD_ALLOW_SEALING; + } + + fd = memfd_create(mie->name, flags); if (fd < 0) { pr_perror("Can't create memfd:%s", mie->name); goto out; @@ -401,3 +426,35 @@ struct file_desc *collect_memfd(u32 id) { return fdesc; } + +int apply_memfd_seals(void) +{ + /* + * We apply the seals after all the mappings are done because the seal + * F_SEAL_FUTURE_WRITE prevents future write access (added in + * Linux 5.1). Thus we must make sure all writable mappings are opened + * before applying this seal. + */ + + int ret, fd; + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) { + if (!inode->pending_seals) + continue; + + fd = memfd_open_inode(inode); + if (fd < 0) + return -1; + + ret = fcntl(fd, F_ADD_SEALS, inode->pending_seals); + close(fd); + + if (ret < 0) { + pr_perror("Cannot apply seals on memfd"); + return -1; + } + } + + return 0; +} diff --git a/images/memfd.proto b/images/memfd.proto index 8eccd6f4fb..546ffc2ab8 100644 --- a/images/memfd.proto +++ b/images/memfd.proto @@ -17,4 +17,5 @@ message memfd_inode_entry { required uint32 gid = 3; required uint64 size = 4; required uint32 shmid = 5; + required uint32 seals = 6 [(criu).flags = "seals.flags"]; }; diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index 6fce4be22d..0cf80aa3ae 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -123,6 +123,14 @@ def _custom_conv(field): ('O_CLOEXEC', 0o02000000), ] +seals_flags_map = [ + ('F_SEAL_SEAL', 0x0001), + ('F_SEAL_SHRINK', 0x0002), + ('F_SEAL_GROW', 0x0004), + ('F_SEAL_WRITE', 0x0008), + ('F_SEAL_FUTURE_WRITE', 0x0010), +] + pmap_flags_map = [ ('PE_PARENT', 1 << 0), ('PE_LAZY', 1 << 1), @@ -135,6 +143,7 @@ def _custom_conv(field): 'mmap.status': mmap_status_map, 'rfile.flags': rfile_flags_map, 'pmap.flags': pmap_flags_map, + 'seals.flags': seals_flags_map, } gen_maps = { From 557b191b003246d9af89f5382a3b103342764db2 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 4 Feb 2020 16:39:53 +0000 Subject: [PATCH 213/277] inhfd_test: add support for non-pair files File pairs naturally block on read() until the write() happen (or the writer is closed). This is not the case for regular files, so we take extra precaution for these. Also cleaned-up an extra my_file.close() Signed-off-by: Nicolas Viennot --- test/zdtm.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index de6b376884..c154740d18 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -689,9 +689,14 @@ def start(self): i = 0 for _, peer_file in self.__files: msg = self.__get_message(i) - my_file.close() try: - data = peer_file.read(16) + # File pairs naturally block on read() until the write() + # happen (or the writer is closed). This is not the case for + # regular files, so we loop. + data = b'' + while not data: + data = peer_file.read(16) + time.sleep(0.1) except Exception as e: print("Unable to read a peer file: %s" % e) sys.exit(1) From 6cf518277714fcdda5fbc204dde208a91a21af88 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 20 Dec 2019 21:56:38 -0500 Subject: [PATCH 214/277] memfd: add tests Testing for all the memfd features, namely support for CR of: * the same fd shared by multiple processes * the same file shared by multiple processes * the memfd content * file flags and fd flags * mmaps, MAP_SHARED and MAP_PRIVATE * seals, excluding F_SEAL_FUTURE_WRITE because this feature only exists in recent kernels (5.1 and up) * inherited fd Signed-off-by: Nicolas Viennot --- test/inhfd/memfd.py | 28 +++++++++ test/inhfd/memfd.py.checkskip | 7 +++ test/inhfd/memfd.py.desc | 1 + test/zdtm/static/Makefile | 4 ++ test/zdtm/static/memfd00.c | 103 ++++++++++++++++++++++++++++++ test/zdtm/static/memfd01.c | 114 ++++++++++++++++++++++++++++++++++ test/zdtm/static/memfd02.c | 87 ++++++++++++++++++++++++++ test/zdtm/static/memfd03.c | 97 +++++++++++++++++++++++++++++ 8 files changed, 441 insertions(+) create mode 100755 test/inhfd/memfd.py create mode 100755 test/inhfd/memfd.py.checkskip create mode 100644 test/inhfd/memfd.py.desc create mode 100644 test/zdtm/static/memfd00.c create mode 100644 test/zdtm/static/memfd01.c create mode 100644 test/zdtm/static/memfd02.c create mode 100644 test/zdtm/static/memfd03.c diff --git a/test/inhfd/memfd.py b/test/inhfd/memfd.py new file mode 100755 index 0000000000..d9ce01e417 --- /dev/null +++ b/test/inhfd/memfd.py @@ -0,0 +1,28 @@ +import os +import ctypes +libc = ctypes.CDLL(None) + + +def memfd_create(name, flags): + return libc.memfd_create(name.encode('utf8'), flags) + + +def create_fds(): + def create_memfd_pair(name): + fd = memfd_create(name, 0) + fw = open('/proc/self/fd/{}'.format(fd), 'wb') + fr = open('/proc/self/fd/{}'.format(fd), 'rb') + os.close(fd) + return (fw, fr) + + return [create_memfd_pair("name{}".format(i)) for i in range(10)] + + +def filename(f): + name = os.readlink('/proc/self/fd/{}'.format(f.fileno())) + name = name.replace(' (deleted)', '') + return name + + +def dump_opts(sockf): + return [] diff --git a/test/inhfd/memfd.py.checkskip b/test/inhfd/memfd.py.checkskip new file mode 100755 index 0000000000..252778969d --- /dev/null +++ b/test/inhfd/memfd.py.checkskip @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +import ctypes +libc = ctypes.CDLL(None) + +# libc may not have memfd_create (e.g., centos on travis) +libc.memfd_create("test".encode('utf8'), 0) diff --git a/test/inhfd/memfd.py.desc b/test/inhfd/memfd.py.desc new file mode 100644 index 0000000000..10666c8232 --- /dev/null +++ b/test/inhfd/memfd.py.desc @@ -0,0 +1 @@ +{ 'flavor': 'h' } diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 5ca05ee9ef..5afd18cd66 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -220,6 +220,10 @@ TST_NOFILE := \ child_subreaper \ child_subreaper_existing_child \ child_subreaper_and_reparent \ + memfd00 \ + memfd01 \ + memfd02 \ + memfd03 \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/memfd00.c b/test/zdtm/static/memfd00.c new file mode 100644 index 0000000000..6b56eca011 --- /dev/null +++ b/test/zdtm/static/memfd00.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd file descriptor"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ + int fd, fl_flags1, fl_flags2, fd_flags1, fd_flags2; + struct statfs statfs1, statfs2; + off_t pos1, pos2; + char buf[5]; + + test_init(argc, argv); + + fd = _memfd_create("somename", MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + if (fcntl(fd, F_SETFL, O_APPEND) < 0) + err(1, "Can't get fl flags"); + + if ((fl_flags1 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if ((fd_flags1 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fstatfs(fd, &statfs1) < 0) + err(1, "statfs issue"); + + if (write(fd, "hello", 5) != 5) + err(1, "write error"); + + pos1 = 3; + if (lseek(fd, pos1, SEEK_SET) < 0) + err(1, "seek error"); + + test_daemon(); + test_waitsig(); + + if ((fl_flags2 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if (fl_flags1 != fl_flags2) { + fail("fl flags differs"); + return 1; + } + + if ((fd_flags2 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fd_flags1 != fd_flags2) { + fail("fd flags differs"); + return 1; + } + + if (fstatfs(fd, &statfs2) < 0) + err(1, "statfs issue"); + + if (statfs1.f_type != statfs2.f_type) { + fail("statfs.f_type differs"); + return 1; + } + + pos2 = lseek(fd, 0, SEEK_CUR); + if (pos1 != pos2) { + fail("position differs"); + return 1; + } + + if (pread(fd, buf, sizeof(buf), 0) != sizeof(buf)) { + fail("read problem"); + return 1; + } + + if (memcmp(buf, "hello", sizeof(buf))) { + fail("content mismatch"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/memfd01.c b/test/zdtm/static/memfd01.c new file mode 100644 index 0000000000..7a78536422 --- /dev/null +++ b/test/zdtm/static/memfd01.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd with different file pointer"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ + pid_t pid, pid_child; + int fd, ret, status; + task_waiter_t t; + + test_init(argc, argv); + + task_waiter_init(&t); + + fd = _memfd_create("somename", MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + pid = getpid(); + + pid_child = fork(); + if (pid_child < 0) + err(1, "Can't fork"); + + if (!pid_child) { + char fdpath[100]; + char buf[1]; + int fl_flags1, fl_flags2, fd_flags1, fd_flags2; + + snprintf(fdpath, sizeof(fdpath), "/proc/%d/fd/%d", pid, fd); + /* + * We pass O_LARGEFILE because in compat mode, our file + * descriptor does not get O_LARGEFILE automatically, but the + * restorer using non-compat open() is forced O_LARGEFILE. + * This creates a flag difference, which we don't want to deal + * with this at the moment. + */ + fd = open(fdpath, O_RDONLY | O_LARGEFILE); + if (fd < 0) + err(1, "Can't open memfd via proc"); + + if ((fl_flags1 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if ((fd_flags1 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + task_waiter_complete(&t, 1); + // checkpoint-restore happens here + task_waiter_wait4(&t, 2); + + if (read(fd, buf, 1) != 1) + err(1, "Can't read"); + + if ((fl_flags2 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if (fl_flags1 != fl_flags2) + err(1, "fl flags differs"); + + if ((fd_flags2 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fd_flags1 != fd_flags2) + err(1, "fd flags differs"); + + if (buf[0] != 'x') + err(1, "Read incorrect"); + + return 0; + } + + task_waiter_wait4(&t, 1); + + test_daemon(); + test_waitsig(); + + if (write(fd, "x", 1) != 1) + err(1, "Can't write"); + + task_waiter_complete(&t, 2); + + ret = wait(&status); + if (ret == -1 || !WIFEXITED(status) || WEXITSTATUS(status)) { + kill(pid, SIGKILL); + fail("child had issue"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/memfd02.c b/test/zdtm/static/memfd02.c new file mode 100644 index 0000000000..1843e9c9af --- /dev/null +++ b/test/zdtm/static/memfd02.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd mmap"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ +#define LEN 6 + int fd; + void *addr_shared, *addr_private; + char buf[LEN]; + + test_init(argc, argv); + + fd = _memfd_create("somename", MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + if (ftruncate(fd, LEN) < 0) + err(1, "Can't truncate"); + + addr_shared = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (addr_shared == MAP_FAILED) + err(1, "Can't mmap"); + + write(fd, "write1", LEN); + + addr_private = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (addr_private == MAP_FAILED) + err(1, "Can't mmap"); + + test_daemon(); + test_waitsig(); + + if (memcmp(addr_shared, "write1", LEN)) { + fail("content mismatch (shared)"); + return 1; + } + + strcpy(addr_shared, "write2"); + + if (pread(fd, buf, LEN, 0) != LEN) { + fail("read problem"); + return 1; + } + + if (memcmp(buf, "write2", LEN)) { + fail("content mismatch (shared)"); + return 1; + } + + if (memcmp(addr_private, "write2", LEN)) { + fail("content mismatch (private)"); + return 1; + } + + strcpy(addr_private, "write3"); + + if (memcmp(addr_shared, "write2", LEN)) { + fail("content mismatch (shared)"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/memfd03.c b/test/zdtm/static/memfd03.c new file mode 100644 index 0000000000..faedf9383e --- /dev/null +++ b/test/zdtm/static/memfd03.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd seals"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + + +#ifndef F_LINUX_SPECIFIC_BASE +# define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS + #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_GET_SEALS + #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif + + +#ifndef F_SEAL_SEAL +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +int main(int argc, char *argv[]) +{ +#define LEN 5 + int fd, fd2; + void *addr_write, *addr_read; + char fdpath[100]; + + test_init(argc, argv); + + fd = _memfd_create("somename", MFD_ALLOW_SEALING | MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + if (write(fd, "hello", LEN) != LEN) + err(1, "Can't write"); + + if (fcntl(fd, F_ADD_SEALS, F_SEAL_WRITE) < 0) + err(1, "Can't add seals"); + + test_daemon(); + test_waitsig(); + + snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", fd); + fd2 = open(fdpath, O_RDWR); + if (fd2 < 0) + err(1, "Can't open memfd via proc"); + + if (fcntl(fd, F_GET_SEALS) != F_SEAL_WRITE) { + fail("Seals are different"); + return 1; + } + + addr_write = mmap(NULL, LEN, PROT_WRITE, MAP_SHARED, fd2, 0); + if (addr_write != MAP_FAILED) { + fail("Should not be able to get write access"); + return 1; + } + + addr_read = mmap(NULL, 1, PROT_READ, MAP_PRIVATE, fd2, 0); + if (addr_read == MAP_FAILED) + err(1, "Can't mmap"); + + if (memcmp(addr_read, "hello", LEN)) { + fail("Mapping has bad data"); + return 1; + } + + pass(); + + return 0; +} From 9049d7b82b60ef744ff134baa0af94be0f7d957e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 23 Jan 2020 16:39:28 +0000 Subject: [PATCH 215/277] criu: Use strlcpy() instead of strncpy() gcc8 in Fedora Rawhide has a new useful warning: > criu/img-remote.c: In function 'push_snapshot_id': > criu/img-remote.c:1099:2: error: 'strncpy' specified bound 4096 equals destination size [-Werror=stringop-truncation] > 1099 | strncpy(rn.snapshot_id, snapshot_id, PATH_MAX); > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From man 3 strncpy: > Warning: If there is no null byte among the first n bytes of src, > the string placed in dest will not be null-terminated. Signed-off-by: Dmitry Safonov --- criu/files-reg.c | 3 ++- criu/files.c | 4 ++-- criu/img-remote.c | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index e07bf4ad18..9547574776 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -33,6 +33,7 @@ #include "namespaces.h" #include "proc_parse.h" #include "pstree.h" +#include "string.h" #include "fault-injection.h" #include "external.h" #include "memfd.h" @@ -498,7 +499,7 @@ static int open_remap_ghost(struct reg_file_info *rfi, gf->remap.rmnt_id = rfi->rfe->mnt_id; if (S_ISDIR(gfe->mode)) - strncpy(gf->remap.rpath, rfi->path, PATH_MAX); + strlcpy(gf->remap.rpath, rfi->path, PATH_MAX); else ghost_path(gf->remap.rpath, PATH_MAX, rfi, rpe); diff --git a/criu/files.c b/criu/files.c index 789b08a4cd..f7963bf547 100644 --- a/criu/files.c +++ b/criu/files.c @@ -45,6 +45,7 @@ #include "autofs.h" #include "parasite.h" #include "parasite-syscall.h" +#include "string.h" #include "kerndat.h" #include "fdstore.h" @@ -291,8 +292,7 @@ static int fixup_overlayfs(struct fd_parms *p, struct fd_link *link) char buf[PATH_MAX]; int n; - strncpy(buf, link->name, PATH_MAX); - buf[PATH_MAX - 1] = 0; + strlcpy(buf, link->name, PATH_MAX); n = snprintf(link->name, PATH_MAX, "%s/%s", m->mountpoint, buf + 2); if (n >= PATH_MAX) { pr_err("Not enough space to replace %s\n", buf); diff --git a/criu/img-remote.c b/criu/img-remote.c index 1160ba0f1d..f9464e011d 100644 --- a/criu/img-remote.c +++ b/criu/img-remote.c @@ -10,6 +10,7 @@ #include "images/remote-image.pb-c.h" #include "protobuf.h" #include "servicefd.h" +#include "string.h" #include "xmalloc.h" #define EPOLL_MAX_EVENTS 50 @@ -1096,7 +1097,7 @@ int push_snapshot_id(void) close(sockfd); return -1; } - strncpy(rn.snapshot_id, snapshot_id, PATH_MAX); + strlcpy(rn.snapshot_id, snapshot_id, PATH_MAX); n = pb_write_obj(sockfd, &rn, PB_SNAPSHOT_ID); From 6875c9acebba4575a78d892c428b6f8846e4e809 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 6 Feb 2020 18:01:00 +0000 Subject: [PATCH 216/277] criu: Make use strlcpy() to copy into allocated strings strncpy() with n == strlen(src) won't put NULL-terminator in dst. Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 76b2a81159..e630e74b7e 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -77,6 +77,7 @@ #include "sigframe.h" #include "fdstore.h" #include "memfd.h" +#include "string.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -3154,7 +3155,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos) args = rst_mem_remap_ptr(this_pos, RM_PRIVATE); args->lsm_profile = lsm_profile; - strncpy(args->lsm_profile, rendered, lsm_profile_len); + strlcpy(args->lsm_profile, rendered, lsm_profile_len + 1); xfree(rendered); } } else { @@ -3188,7 +3189,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos) args = rst_mem_remap_ptr(this_pos, RM_PRIVATE); args->lsm_sockcreate = lsm_sockcreate; - strncpy(args->lsm_sockcreate, rendered, lsm_sockcreate_len); + strlcpy(args->lsm_sockcreate, rendered, lsm_sockcreate_len + 1); xfree(rendered); } } else { From 7491326bd5f202cac31227cacbe5f6085273fb97 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 24 Jan 2020 11:55:00 +0000 Subject: [PATCH 217/277] zdtm: Make test_{doc,author} weak variables Allows to override them in every test, optionally. Signed-off-by: Dmitry Safonov --- test/zdtm/lib/parseargs.c | 4 ++-- test/zdtm/lib/zdtmtst.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/zdtm/lib/parseargs.c b/test/zdtm/lib/parseargs.c index 7e411f6b6c..d8aa4ed639 100644 --- a/test/zdtm/lib/parseargs.c +++ b/test/zdtm/lib/parseargs.c @@ -113,8 +113,8 @@ static void helpexit(void) exit(1); } -const char *test_doc; -const char *test_author; +const char __attribute__((weak)) *test_doc; +const char __attribute__((weak)) *test_author; static void prdoc(void) { diff --git a/test/zdtm/lib/zdtmtst.h b/test/zdtm/lib/zdtmtst.h index 1fbf795bf8..2cd4bdd1dd 100644 --- a/test/zdtm/lib/zdtmtst.h +++ b/test/zdtm/lib/zdtmtst.h @@ -155,6 +155,9 @@ struct zdtm_tcp_opts { int flags; }; +extern const char *test_author; +extern const char *test_doc; + extern int tcp_init_server_with_opts(int family, int *port, struct zdtm_tcp_opts *opts); extern pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid, void *child_tid, unsigned long newtls); From a3501c0af9fcd14c41005095dbae57b82a02f6d3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 21 Jan 2020 12:31:00 +0000 Subject: [PATCH 218/277] vdso: Add vdso_is_present() helper Use it in kerndat to check if the kernel provides vDSO. Signed-off-by: Dmitry Safonov --- criu/include/util-vdso.h | 5 +++++ criu/vdso.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/criu/include/util-vdso.h b/criu/include/util-vdso.h index 33b7411dee..046cd96d7e 100644 --- a/criu/include/util-vdso.h +++ b/criu/include/util-vdso.h @@ -41,6 +41,11 @@ struct vdso_maps { bool compatible; }; +static inline bool vdso_is_present(struct vdso_maps *m) +{ + return m->vdso_start != VDSO_BAD_ADDR; +} + #define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, } #define VDSO_SYMTABLE_INIT \ diff --git a/criu/vdso.c b/criu/vdso.c index 50b8b8dba5..b8df2d7a65 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -611,6 +611,12 @@ int kerndat_vdso_fill_symtable(void) return -1; } + if (!vdso_is_present(&vdso_maps)) { + pr_debug("Kernel doesn't premap vDSO - probably CONFIG_VDSO is not set\n"); + kdat.vdso_sym = vdso_maps.sym; + return 0; + } + if (vdso_fill_self_symtable(&vdso_maps)) { pr_err("Failed to fill self vdso symtable\n"); return -1; @@ -643,7 +649,7 @@ int kerndat_vdso_preserves_hint(void) kdat.vdso_hint_reliable = 0; - if (vdso_maps.vdso_start == VDSO_BAD_ADDR) + if (!vdso_is_present(&vdso_maps)) return 0; child = fork(); @@ -693,7 +699,7 @@ int kerndat_vdso_preserves_hint(void) goto out_kill; } - if (vdso_maps_after.vdso_start != VDSO_BAD_ADDR) + if (vdso_is_present(&vdso_maps_after)) kdat.vdso_hint_reliable = 1; ret = 0; From fa292e4c5a8a2b41abc2fb064ba6d133b531bcfd Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 22 Jan 2020 14:00:27 +0000 Subject: [PATCH 219/277] vdso: Repair !CONFIG_VDSO Apparently, C/R is broken when CONFIG_VDSO is not set. Probably, I've broken it while adding arm vdso support. Or maybe some commits after. Repair it by adding checks into vdso_init_dump(), vdso_init_restore(). Also, don't try handling vDSO in restorer if it wasn't present in parent. And prevent summing VDSO_BAD_SIZE to {vdso,vvar}_rt_size. Reported-by: Adrian Reber Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 9 +++++--- criu/pie/parasite-vdso.c | 12 ++++++++++ criu/pie/restorer.c | 2 +- criu/vdso.c | 48 ++++++++++++++++++++++++++-------------- 4 files changed, 50 insertions(+), 21 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index e630e74b7e..c9dd953927 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -3385,10 +3385,13 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns vdso_maps_rt = vdso_maps; /* * Figure out how much memory runtime vdso and vvar will need. + * Check if vDSO or VVAR is not provided by kernel. */ - vdso_rt_size = vdso_maps_rt.sym.vdso_size; - if (vdso_rt_size && vdso_maps_rt.sym.vvar_size) - vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); + if (vdso_maps_rt.sym.vdso_size != VDSO_BAD_SIZE) { + vdso_rt_size = vdso_maps_rt.sym.vdso_size; + if (vdso_maps_rt.sym.vvar_size != VVAR_BAD_SIZE) + vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); + } task_args->bootstrap_len += vdso_rt_size; /* diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 3a1684d353..3f5cb14312 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -292,6 +292,18 @@ int vdso_proxify(struct vdso_maps *rt, bool *added_proxy, return -1; } + /* + * We could still do something about it here.. + * 1. Hope that vDSO from images still works (might not be the case). + * 2. Try to map vDSO. + * But, hopefully no one intends to migrate application that uses + * vDSO to a dut where kernel doesn't provide it. + */ + if (!vdso_is_present(rt)) { + pr_err("vDSO isn't provided by kernel, but exists in images\n"); + return -1; + } + /* * vDSO mark overwrites Elf program header of proxy vDSO thus * it must never ever be greater in size. diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 7012b88a15..afe185f048 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1454,7 +1454,7 @@ long __export_restore_task(struct task_restore_args *args) * it's presence in original task: vdso will be used for fast * getttimeofday() in restorer's log timings. */ - if (!args->can_map_vdso) { + if (!args->can_map_vdso && vdso_is_present(&args->vdso_maps_rt)) { /* It's already checked in kdat, but let's check again */ if (args->compatible_mode) { pr_err("Compatible mode without vdso map support\n"); diff --git a/criu/vdso.c b/criu/vdso.c index b8df2d7a65..19ba4765df 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -275,6 +275,10 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, struct vma_area *vma; int fd = -1; + /* vDSO is not provided by kernel */ + if (kdat.vdso_sym.vdso_size == VDSO_BAD_SIZE) + return 0; + vcheck = get_vdso_check_type(ctl); if (vcheck == VDSO_CHECK_PFN) { BUG_ON(vdso_pfn == VDSO_BAD_PFN); @@ -534,21 +538,6 @@ static int vdso_fill_compat_symtable(struct vdso_maps *native, } #endif /* CONFIG_COMPAT */ -int vdso_init_dump(void) -{ - if (vdso_parse_maps(PROC_SELF, &vdso_maps)) { - pr_err("Failed reading self/maps for filling vdso/vvar bounds\n"); - return -1; - } - - if (kdat.pmap != PM_FULL) - pr_info("VDSO detection turned off\n"); - else if (vaddr_to_pfn(-1, vdso_maps.vdso_start, &vdso_pfn)) - return -1; - - return 0; -} - /* * Check vdso/vvar sized read from maps to kdat values. * We do not read /proc/self/maps for compatible vdso as it's @@ -566,11 +555,36 @@ static int is_kdat_vdso_sym_valid(void) return true; } -int vdso_init_restore(void) +int vdso_init_dump(void) { + if (vdso_parse_maps(PROC_SELF, &vdso_maps)) { + pr_err("Failed reading self/maps for filling vdso/vvar bounds\n"); + return -1; + } + + if (!is_kdat_vdso_sym_valid()) { + pr_err("Kdat sizes of vdso/vvar differ to maps file \n"); + return -1; + } + if (kdat.vdso_sym.vdso_size == VDSO_BAD_SIZE) { - pr_err("Kdat has empty vdso symtable\n"); + pr_debug("Kdat has empty vdso symtable - probably CONFIG_VDSO is not set\n"); + return 0; + } + + if (kdat.pmap != PM_FULL) + pr_info("VDSO detection turned off\n"); + else if (vaddr_to_pfn(-1, vdso_maps.vdso_start, &vdso_pfn)) return -1; + + return 0; +} + +int vdso_init_restore(void) +{ + if (kdat.vdso_sym.vdso_size == VDSO_BAD_SIZE) { + pr_debug("Kdat has empty vdso symtable - probably CONFIG_VDSO is not set\n"); + return 0; } /* Already filled vdso_maps during kdat test */ From 540ab31e1df3ae6c69cc03df970f12b80afe738a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 22 Jan 2020 14:05:47 +0000 Subject: [PATCH 220/277] vdso: Don't page-align vvar It's always page-aligned (as any VMA). Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index c9dd953927..fa12db98e3 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -3390,7 +3390,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns if (vdso_maps_rt.sym.vdso_size != VDSO_BAD_SIZE) { vdso_rt_size = vdso_maps_rt.sym.vdso_size; if (vdso_maps_rt.sym.vvar_size != VVAR_BAD_SIZE) - vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); + vdso_rt_size += vdso_maps_rt.sym.vvar_size; } task_args->bootstrap_len += vdso_rt_size; From 7150497b271b8e249d0da4f264bd0fd1e73e00c6 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 18 Jan 2020 14:28:03 +0000 Subject: [PATCH 221/277] zdtm: mntns_rw_ro_rw update error msg Signed-off-by: Radostin Stoyanov --- test/zdtm/static/mntns_rw_ro_rw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/mntns_rw_ro_rw.c b/test/zdtm/static/mntns_rw_ro_rw.c index 7aed254b69..6179c47882 100644 --- a/test/zdtm/static/mntns_rw_ro_rw.c +++ b/test/zdtm/static/mntns_rw_ro_rw.c @@ -31,12 +31,12 @@ int main(int argc, char **argv) test_waitsig(); if (access("/proc/sys/net/ipv4/ip_forward", W_OK)) { - fail("Unable to access /proc/sys/net/core/wmem_max"); + fail("Unable to access /proc/sys/net/ipv4/ip_forward"); return 1; } if (access("/proc/sys/kernel/ns_last_pid", W_OK) != -1 || errno != EROFS) { - fail("Unable to access /proc/sys/kernel/pid_max"); + fail("Unable to access /proc/sys/kernel/ns_last_pid"); return 1; } From 43a7b34094ddda2a5f60dc4902a55a1d73f6f6ec Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sat, 8 Feb 2020 19:43:39 +0100 Subject: [PATCH 222/277] Travis: fix podman test case Podman changed the output of 'podman ps'. For the test only running containers are interesting. Adding the filter '-f status=running' only returns running containers as previously. Signed-off-by: Adrian Reber --- scripts/travis/podman-test.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index 825bca746e..7490d5fe9a 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -39,12 +39,12 @@ for i in `seq 20`; do echo "Test $i for podman container checkpoint" podman exec cr ps axf podman logs cr - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman container checkpoint cr - [ `podman ps -f name=cr -q | wc -l` -eq "0" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "0" ] podman ps -a podman container restore cr - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman logs cr done @@ -53,16 +53,16 @@ for i in `seq 20`; do podman ps -a podman exec cr ps axf podman logs cr - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman container checkpoint -l --export /tmp/chkpt.tar.gz - [ `podman ps -f name=cr -q | wc -l` -eq "0" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "0" ] podman ps -a podman rm -fa podman ps -a podman container restore --import /tmp/chkpt.tar.gz - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman container restore --name cr2 --import /tmp/chkpt.tar.gz - [ `podman ps -f name=cr2 -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr2 -q -f status=running | wc -l` -eq "1" ] podman ps -a podman logs cr podman logs cr2 @@ -70,7 +70,7 @@ for i in `seq 20`; do podman rm -fa podman ps -a podman container restore --import /tmp/chkpt.tar.gz - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman ps -a rm -f /tmp/chkpt.tar.gz done From b5e857b10cabfa770760e8b41490907c592c1634 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 6 Feb 2020 20:46:17 -0800 Subject: [PATCH 223/277] test/zdtm/inhfd: update dump options one each iteration This allows to run inhfd tests with many iterations of C/R. Signed-off-by: Andrei Vagin --- test/zdtm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/zdtm.py b/test/zdtm.py index c154740d18..1a89466871 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -762,6 +762,11 @@ def getropts(self): fcntl.fcntl(fd, fcntl.F_SETFD, fdflags) peer_file_name = self.__peer_file_names[i] ropts.extend(["--inherit-fd", "fd[%d]:%s" % (fd, peer_file_name)]) + self.__peer_file_names = [] + self.__dump_opts = [] + for _, peer_file in self.__files: + self.__peer_file_names.append(self.__fdtyp.filename(peer_file)) + self.__dump_opts += self.__fdtyp.dump_opts(peer_file) return ropts def print_output(self): From a6083ea220fee29e20799e91bb6ec8fd153ece8f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 6 Feb 2020 21:20:20 -0800 Subject: [PATCH 224/277] python: sort imports 202 Additional newline in a group of imports. I100 Import statements are in the wrong order. Signed-off-by: Andrei Vagin --- lib/py/images/pb2dict.py | 13 +++++++------ test/inhfd/memfd.py | 2 +- test/inhfd/socket.py | 2 +- test/others/rpc/config_file.py | 7 ++++--- test/zdtm.py | 32 +++++++++++++++++--------------- 5 files changed, 30 insertions(+), 26 deletions(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index 0cf80aa3ae..40a6036cf4 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -1,12 +1,13 @@ -from google.protobuf.descriptor import FieldDescriptor as FD -import opts_pb2 -from ipaddress import IPv4Address, ip_address -from ipaddress import IPv6Address -import socket +import base64 import collections import os -import base64 import quopri +import socket +from ipaddress import IPv4Address, IPv6Address, ip_address + +from google.protobuf.descriptor import FieldDescriptor as FD + +import opts_pb2 if "encodebytes" not in dir(base64): base64.encodebytes = base64.encodestring diff --git a/test/inhfd/memfd.py b/test/inhfd/memfd.py index d9ce01e417..b06e35068f 100755 --- a/test/inhfd/memfd.py +++ b/test/inhfd/memfd.py @@ -1,5 +1,5 @@ -import os import ctypes +import os libc = ctypes.CDLL(None) diff --git a/test/inhfd/socket.py b/test/inhfd/socket.py index 9cea16ffb9..7efe7faab5 100755 --- a/test/inhfd/socket.py +++ b/test/inhfd/socket.py @@ -1,5 +1,5 @@ -import socket import os +import socket def create_fds(): diff --git a/test/others/rpc/config_file.py b/test/others/rpc/config_file.py index 7b07bc145f..90c80fcaea 100755 --- a/test/others/rpc/config_file.py +++ b/test/others/rpc/config_file.py @@ -1,11 +1,12 @@ #!/usr/bin/python +import argparse import os import sys -import rpc_pb2 as rpc -import argparse -from tempfile import mkstemp import time +from tempfile import mkstemp + +import rpc_pb2 as rpc from setup_swrk import setup_swrk diff --git a/test/zdtm.py b/test/zdtm.py index 1a89466871..cac216411d 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1,31 +1,33 @@ #!/usr/bin/env python # vim: noet ts=8 sw=8 sts=8 from __future__ import absolute_import, division, print_function, unicode_literals -from builtins import (str, open, range, zip, int, input) import argparse +import atexit +import datetime +import errno +import fcntl import glob +import linecache +import mmap import os -import subprocess -import time -import tempfile -import shutil +import random import re -import stat +import shutil import signal -import atexit -import sys -import linecache -import random +import stat import string -import fcntl -import errno -import datetime -import yaml import struct -import mmap +import subprocess +import sys +import tempfile +import time +from builtins import (input, int, open, range, str, zip) + import pycriu as crpc +import yaml + os.chdir(os.path.dirname(os.path.abspath(__file__))) prev_line = None From 285e6893dbf54d9cdeff440458225c0a103d4b14 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 21 Feb 2020 12:14:38 +0300 Subject: [PATCH 225/277] test/jenkins: remove empty line at the end of file Signed-off-by: Pavel Tikhomirov --- test/jenkins/criu-fault.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index 4e3790e59c..c27dd37389 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -27,4 +27,3 @@ fi ./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/static/maps01 --fault 132 -f h || fail - From a5f6158faa8160f18ca2a8d58953a0e67dc06d66 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 26 Feb 2020 12:25:37 +0200 Subject: [PATCH 226/277] uffd: use userns_call() to execute ioctl(UFFDIO_API) In the recent kernels the userfaultfd support for FORK events is limited to CAP_SYS_PTRACE. That causes the followong error when the ioctl(UFFDIO_API) is executed from non-privilieged userns: Error (criu/uffd.c:273): uffd: Failed to get uffd API: Operation not permitted Wrapping the call to ioctl(UFFDIO_API) in userns_call() resolves the issue. Fixes: #964 Signed-off-by: Mike Rapoport --- criu/uffd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/criu/uffd.c b/criu/uffd.c index c47b35b1f8..99373c04de 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -40,6 +40,7 @@ #include "tls.h" #include "fdstore.h" #include "util.h" +#include "namespaces.h" #undef LOG_PREFIX #define LOG_PREFIX "uffd: " @@ -254,6 +255,13 @@ bool uffd_noncooperative(void) return (kdat.uffd_features & features) == features; } +static int uffd_api_ioctl(void *arg, int fd, pid_t pid) +{ + struct uffdio_api *uffdio_api = arg; + + return ioctl(fd, UFFDIO_API, uffdio_api); +} + int uffd_open(int flags, unsigned long *features) { struct uffdio_api uffdio_api = { 0 }; @@ -269,7 +277,8 @@ int uffd_open(int flags, unsigned long *features) if (features) uffdio_api.features = *features; - if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { + if (userns_call(uffd_api_ioctl, 0, &uffdio_api, sizeof(uffdio_api), + uffd)) { pr_perror("Failed to get uffd API"); goto err; } From 832ab07f570c3338687b42ec6489be187e21fca3 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 18 Feb 2020 19:45:08 +0000 Subject: [PATCH 227/277] criu(8): Add documentation for --enable-fs This option was introduced with: https://github.com/checkpoint-restore/criu/commit/e2c38245c613df5e36dcf0253c7652f928e46abf v2: (comment from Pavel Tikhomirov) --enable-fs does not fit with --external dev[]:, see try_resolve_ext_mount, external dev mounts only determined for FSTYPE__UNSUPPORTED. Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index a6b9f7fae2..0ac29103a1 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -90,6 +90,19 @@ The following levels are available: *-L*, *--libdir* 'path':: Path to plugins directory. +*--enable-fs* ['fs'[,'fs'...]]:: + Specify a comma-separated list of filesystem names that should + be auto-detected. The value 'all' enables auto-detection for + all filesystems. ++ +Note: This option is not safe, use at your own risk. +Auto-detecting a filesystem mount assumes that the mountpoint can +be restored with *mount(src, mountpoint, flags, options)*. When used, +*dump* is expected to always succeed if a mountpoint is to be +auto-detected, however *restore* may fail (or do something wrong) +if the assumption for restore logic is incorrect. This option is +not compatable with *--external* *dev*. + *--action-script* 'script':: Add an external action script to be executed at certain stages. The environment variable *CRTOOLS_SCRIPT_ACTION* is available From 30b07fccd317d7fccacecc8d40d0c18949d747d4 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 18 Feb 2020 19:53:36 +0000 Subject: [PATCH 228/277] criu(8): Convert tabs to spaces Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 0ac29103a1..ab63e461c7 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -477,7 +477,7 @@ The 'mode' may be one of the following: *soft*::: Restore cgroup properties if only cgroup has been created by *criu*, otherwise do not restore properties. This is the - default if mode is unspecified. + default if mode is unspecified. *full*::: Always restore all cgroups and their properties. @@ -575,17 +575,17 @@ check* always checks Category 1 features unless *--feature* is specified which only checks a specified feature. *Category 1*::: Absolutely required. These are features like support for - */proc/PID/map_files*, *NETLINK_SOCK_DIAG* socket - monitoring, */proc/sys/kernel/ns_last_pid* etc. + */proc/PID/map_files*, *NETLINK_SOCK_DIAG* socket + monitoring, */proc/sys/kernel/ns_last_pid* etc. *Category 2*::: Required only for specific cases. These are features - like AIO remap, */dev/net/tun* and others that are only - required if a process being dumped or restored - is using those. + like AIO remap, */dev/net/tun* and others that are only + required if a process being dumped or restored + is using those. *Category 3*::: Experimental. These are features like *task-diag* that - are used for experimental purposes (mostly - during development). + are used for experimental purposes (mostly + during development). If there are no errors or warnings, *criu* prints "Looks good." and its exit code is 0. From 4c27b3db4f4325a311d8bfa9a50ea3efb4d6e377 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 09:10:49 +0000 Subject: [PATCH 229/277] seize: prepare for cgroupv2 freezer The cgroupv2 freezer does not return the same strings as v1. Instead of THAWED and FROZEN v2 returns 0 and 1 (strings). This prepares the seize code to use 0 and 1 everywhere and THAWED and FROZEN only for v1 specific code paths. Signed-off-by: Adrian Reber --- criu/seize.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index fd314666f0..14cd824172 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -30,7 +30,14 @@ static const char frozen[] = "FROZEN"; static const char freezing[] = "FREEZING"; static const char thawed[] = "THAWED"; -static const char *get_freezer_state(int fd) +enum freezer_state { + FREEZER_ERROR = -1, + THAWED, + FROZEN, + FREEZING +}; + +static enum freezer_state get_freezer_state(int fd) { char state[32]; int ret; @@ -52,15 +59,15 @@ static const char *get_freezer_state(int fd) pr_debug("freezer.state=%s\n", state); if (strcmp(state, frozen) == 0) - return frozen; + return FROZEN; else if (strcmp(state, freezing) == 0) - return freezing; + return FREEZING; else if (strcmp(state, thawed) == 0) - return thawed; + return THAWED; pr_err("Unknown freezer state: %s\n", state); err: - return NULL; + return FREEZER_ERROR; } static bool freezer_thawed; @@ -98,7 +105,7 @@ static int freezer_restore_state(void) static int processes_to_wait; static pid_t *processes_to_wait_pids; -static int seize_cgroup_tree(char *root_path, const char *state) +static int seize_cgroup_tree(char *root_path, enum freezer_state state) { DIR *dir; struct dirent *de; @@ -134,7 +141,7 @@ static int seize_cgroup_tree(char *root_path, const char *state) if (!compel_interrupt_task(pid)) { pr_debug("SEIZE %d: success\n", pid); processes_to_wait++; - } else if (state == frozen) { + } else if (state == FROZEN) { char buf[] = "/proc/XXXXXXXXXX/exe"; struct stat st; @@ -332,7 +339,7 @@ static int freeze_processes(void) { int fd, exit_code = -1; char path[PATH_MAX]; - const char *state = thawed; + enum freezer_state state = THAWED; static const unsigned long step_ms = 100; unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms; @@ -361,11 +368,11 @@ static int freeze_processes(void) return -1; } state = get_freezer_state(fd); - if (!state) { + if (state == FREEZER_ERROR) { close(fd); return -1; } - if (state == thawed) { + if (state == THAWED) { freezer_thawed = true; lseek(fd, 0, SEEK_SET); @@ -384,12 +391,12 @@ static int freeze_processes(void) */ for (; i <= nr_attempts; i++) { state = get_freezer_state(fd); - if (!state) { + if (state == FREEZER_ERROR) { close(fd); return -1; } - if (state == frozen) + if (state == FROZEN) break; if (alarm_timeouted()) goto err; From aac41164b2cd7f0d2047f207b32844524682e43f Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 09:53:51 +0000 Subject: [PATCH 230/277] seize: factor out opening and writing the freezer state More preparations for cgroupv2 freezer. Factor our the freezer state opening and writing to have one location where to handle v1 and v2 differences. Signed-off-by: Adrian Reber --- criu/seize.c | 81 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index 14cd824172..b53707e44a 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -20,6 +20,7 @@ #include "seccomp.h" #include "seize.h" #include "stats.h" +#include "string.h" #include "xmalloc.h" #include "util.h" #include @@ -77,13 +78,39 @@ const char *get_real_freezer_state(void) return freezer_thawed ? thawed : frozen; } -static int freezer_restore_state(void) +static int freezer_write_state(int fd, enum freezer_state new_state) { - int fd; - char path[PATH_MAX]; + char state[32]; + int ret; - if (!opts.freeze_cgroup || freezer_thawed) - return 0; + if (new_state == THAWED) { + if (strlcpy(state, thawed, sizeof(state)) >= sizeof(state)) + return -1; + } else if (new_state == FROZEN) { + if (strlcpy(state, frozen, sizeof(state)) >= sizeof(state)) + return -1; + } else { + return -1; + } + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + pr_perror("Unable to seek freezer FD"); + return -1; + } + if (write(fd, state, sizeof(state)) != sizeof(state)) { + pr_perror("Unable to %s tasks", + (new_state == THAWED) ? "thaw" : "freeze"); + return -1; + } + + return 0; +} + +static int freezer_open(void) +{ + char path[PATH_MAX]; + int fd; snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); fd = open(path, O_RDWR); @@ -92,13 +119,24 @@ static int freezer_restore_state(void) return -1; } - if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) { - pr_perror("Unable to freeze tasks"); - close(fd); + return fd; +} + +static int freezer_restore_state(void) +{ + int fd; + int ret; + + if (!opts.freeze_cgroup || freezer_thawed) + return 0; + + fd = freezer_open(); + if (fd < 0) return -1; - } + + ret = freezer_write_state(fd, FROZEN); close(fd); - return 0; + return ret; } /* A number of tasks in a freezer cgroup which are not going to be dumped */ @@ -338,7 +376,6 @@ static int log_unfrozen_stacks(char *root) static int freeze_processes(void) { int fd, exit_code = -1; - char path[PATH_MAX]; enum freezer_state state = THAWED; static const unsigned long step_ms = 100; @@ -361,12 +398,10 @@ static int freeze_processes(void) pr_debug("freezing processes: %lu attempts with %lu ms steps\n", nr_attempts, step_ms); - snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); - fd = open(path, O_RDWR); - if (fd < 0) { - pr_perror("Unable to open %s", path); + fd = freezer_open(); + if (fd < 0) return -1; - } + state = get_freezer_state(fd); if (state == FREEZER_ERROR) { close(fd); @@ -375,9 +410,7 @@ static int freeze_processes(void) if (state == THAWED) { freezer_thawed = true; - lseek(fd, 0, SEEK_SET); - if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) { - pr_perror("Unable to freeze tasks"); + if (freezer_write_state(fd, FROZEN)) { close(fd); return -1; } @@ -427,13 +460,9 @@ static int freeze_processes(void) } err: - if (exit_code == 0 || freezer_thawed) { - lseek(fd, 0, SEEK_SET); - if (write(fd, thawed, sizeof(thawed)) != sizeof(thawed)) { - pr_perror("Unable to thaw tasks"); - exit_code = -1; - } - } + if (exit_code == 0 || freezer_thawed) + exit_code = freezer_write_state(fd, THAWED); + if (close(fd)) { pr_perror("Unable to thaw tasks"); return -1; From 6f19249b2565f3f7c0a1f8f65b4ae180e8f7f34b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 13:38:42 +0000 Subject: [PATCH 231/277] seize: support cgroup v2 freezer This adds support to checkpoint processes using the cgroup v2 freezer. Signed-off-by: Adrian Reber --- criu/seize.c | 150 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 133 insertions(+), 17 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index b53707e44a..0ba2d9b1db 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -38,7 +38,10 @@ enum freezer_state { FREEZING }; -static enum freezer_state get_freezer_state(int fd) +/* Track if we are running on cgroup v2 system. */ +static bool cgroup_v2 = false; + +static enum freezer_state get_freezer_v1_state(int fd) { char state[32]; int ret; @@ -71,6 +74,70 @@ static enum freezer_state get_freezer_state(int fd) return FREEZER_ERROR; } +static enum freezer_state get_freezer_v2_state(int fd) +{ + int exit_code = FREEZER_ERROR; + char path[PATH_MAX]; + FILE *event; + char state; + int ret; + + /* + * cgroupv2 freezer uses cgroup.freeze to control the state. The file + * can return 0 or 1. 1 means the cgroup is frozen; 0 means it is not + * frozen. Writing 1 to an unfrozen cgroup can freeze it. Freezing can + * take some time and if the cgroup has finished freezing can be + * seen in cgroup.events: frozen 0|1. + */ + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + pr_perror("Unable to seek freezer FD"); + goto out; + } + ret = read(fd, &state, 1); + if (ret <= 0) { + pr_perror("Unable to read from freezer FD"); + goto out; + } + pr_debug("cgroup.freeze=%c\n", state); + if (state == '0') { + exit_code = THAWED; + goto out; + } + + snprintf(path, sizeof(path), "%s/cgroup.events", opts.freeze_cgroup); + event = fopen(path, "r"); + if (event == NULL) { + pr_perror("Unable to open %s", path); + goto out; + } + while (fgets(path, sizeof(path), event)) { + if (strncmp(path, "frozen", 6) != 0) { + continue; + } else if (strncmp(path, "frozen 0", 8) == 0) { + exit_code = FREEZING; + goto close; + } else if (strncmp(path, "frozen 1", 8) == 0) { + exit_code = FROZEN; + goto close; + } + } + + pr_err("Unknown freezer state: %c\n", state); +close: + fclose(event); +out: + return exit_code; +} + +static enum freezer_state get_freezer_state(int fd) +{ + if (cgroup_v2) + return get_freezer_v2_state(fd); + return get_freezer_v1_state(fd); +} + static bool freezer_thawed; const char *get_real_freezer_state(void) @@ -80,15 +147,23 @@ const char *get_real_freezer_state(void) static int freezer_write_state(int fd, enum freezer_state new_state) { - char state[32]; + char state[32] = {0}; int ret; if (new_state == THAWED) { - if (strlcpy(state, thawed, sizeof(state)) >= sizeof(state)) - return -1; + if (cgroup_v2) + state[0] = '0'; + else + if (strlcpy(state, thawed, sizeof(state)) >= + sizeof(state)) + return -1; } else if (new_state == FROZEN) { - if (strlcpy(state, frozen, sizeof(state)) >= sizeof(state)) - return -1; + if (cgroup_v2) + state[0] = '1'; + else + if (strlcpy(state, frozen, sizeof(state)) >= + sizeof(state)) + return -1; } else { return -1; } @@ -109,10 +184,13 @@ static int freezer_write_state(int fd, enum freezer_state new_state) static int freezer_open(void) { + const char freezer_v1[] = "freezer.state"; + const char freezer_v2[] = "cgroup.freeze"; char path[PATH_MAX]; int fd; - snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); + snprintf(path, sizeof(path), "%s/%s", opts.freeze_cgroup, + cgroup_v2 ? freezer_v2 : freezer_v1); fd = open(path, O_RDWR); if (fd < 0) { pr_perror("Unable to open %s", path); @@ -139,6 +217,22 @@ static int freezer_restore_state(void) return ret; } +static FILE *freezer_open_thread_list(char *root_path) +{ + char path[PATH_MAX]; + FILE *f; + + snprintf(path, sizeof(path), "%s/%s", root_path, + cgroup_v2 ? "cgroup.threads" : "tasks"); + f = fopen(path, "r"); + if (f == NULL) { + pr_perror("Unable to open %s", path); + return NULL; + } + + return f; +} + /* A number of tasks in a freezer cgroup which are not going to be dumped */ static int processes_to_wait; static pid_t *processes_to_wait_pids; @@ -154,12 +248,10 @@ static int seize_cgroup_tree(char *root_path, enum freezer_state state) * New tasks can appear while a freezer state isn't * frozen, so we need to catch all new tasks. */ - snprintf(path, sizeof(path), "%s/tasks", root_path); - f = fopen(path, "r"); - if (f == NULL) { - pr_perror("Unable to open %s", path); + f = freezer_open_thread_list(root_path); + if (f == NULL) return -1; - } + while (fgets(path, sizeof(path), f)) { pid_t pid; int ret; @@ -306,12 +398,10 @@ static int log_unfrozen_stacks(char *root) char path[PATH_MAX]; FILE *f; - snprintf(path, sizeof(path), "%s/tasks", root); - f = fopen(path, "r"); - if (f == NULL) { - pr_perror("Unable to open %s", path); + f = freezer_open_thread_list(root); + if (f == NULL) return -1; - } + while (fgets(path, sizeof(path), f)) { pid_t pid; int ret, stack; @@ -820,6 +910,27 @@ static int collect_task(struct pstree_item *item) return -1; } +static int cgroup_version(void) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); + if (access(path, F_OK) == 0) { + cgroup_v2 = false; + return 0; + } + + snprintf(path, sizeof(path), "%s/cgroup.freeze", opts.freeze_cgroup); + if (access(path, F_OK) == 0) { + cgroup_v2 = true; + return 0; + } + + pr_err("Neither a cgroupv1 (freezer.state) or cgroupv2 (cgroup.freeze) control file found.\n"); + + return -1; +} + int collect_pstree(void) { pid_t pid = root_item->pid->real; @@ -835,6 +946,11 @@ int collect_pstree(void) */ alarm(opts.timeout); + if (opts.freeze_cgroup && cgroup_version()) + goto err; + + pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1); + if (opts.freeze_cgroup && freeze_processes()) goto err; From 378337a496ca759848180bc5411e4446298c5e4e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 5 Feb 2020 09:39:32 +0000 Subject: [PATCH 232/277] cgroup2: add minimal cgroup2 support The runc test cases are (sometimes) mounting a cgroup inside of the container. For these tests to succeed, let CRIU know that cgroup2 exists and how to restore such a mount. This does not fix any specific cgroup2 settings, it just enables CRIU to mount cgroup2 in the restored container. Signed-off-by: Adrian Reber --- criu/filesystems.c | 5 +++++ images/mnt.proto | 2 ++ 2 files changed, 7 insertions(+) diff --git a/criu/filesystems.c b/criu/filesystems.c index 1e4550b371..d76b182918 100644 --- a/criu/filesystems.c +++ b/criu/filesystems.c @@ -747,6 +747,11 @@ static struct fstype fstypes[] = { .code = FSTYPE__CGROUP, .parse = cgroup_parse, .sb_equal = cgroup_sb_equal, + }, { + .name = "cgroup2", + .code = FSTYPE__CGROUP2, + .parse = cgroup_parse, + .sb_equal = cgroup_sb_equal, }, { .name = "aufs", .code = FSTYPE__AUFS, diff --git a/images/mnt.proto b/images/mnt.proto index 4160acbf62..8983395aea 100644 --- a/images/mnt.proto +++ b/images/mnt.proto @@ -28,6 +28,8 @@ enum fstype { // RPC_PIPEFS = 20; // NFS = 21; // NFS4 = 22; + + CGROUP2 = 23; }; message mnt_entry { From 0c4b856f29fc0dfe2bd239cc4711ca23be98520b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 1 Mar 2020 04:26:12 +0300 Subject: [PATCH 233/277] fs: use __open_proc instead of open("/proc/...", ... ) Processes can run in a mount namespace without /proc. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/files-reg.c | 4 +--- criu/memfd.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 9547574776..8e36eaa33e 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -819,14 +819,12 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de if (S_ISREG(st->st_mode)) { int fd, ret; - char lpath[PSFDS]; /* * Reopen file locally since it may have no read * permissions when drained */ - sprintf(lpath, "/proc/self/fd/%d", _fd); - fd = open(lpath, O_RDONLY); + fd = open_proc(PROC_SELF, "fd/%d", _fd); if (fd < 0) { pr_perror("Can't open ghost original file"); goto err_out; diff --git a/criu/memfd.c b/criu/memfd.c index d17c10fb75..30ccdf22c9 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -299,7 +299,6 @@ static int memfd_open_inode(struct memfd_inode *inode) int memfd_open(struct file_desc *d, u32 *fdflags) { - char lpath[PSFDS]; struct memfd_info *mfi; MemfdFileEntry *mfe; int fd, _fd; @@ -318,14 +317,13 @@ int memfd_open(struct file_desc *d, u32 *fdflags) goto err; /* Reopen the fd with original permissions */ - sprintf(lpath, "/proc/self/fd/%d", fd); flags = fdflags ? *fdflags : mfe->flags; /* * Ideally we should call compat version open() to not force the * O_LARGEFILE file flag with regular open(). It doesn't seem that * important though. */ - _fd = open(lpath, flags); + _fd = __open_proc(getpid(), 0, flags, "fd/%d", fd); if (_fd < 0) { pr_perror("Can't reopen memfd id=%d", mfe->id); goto err; From 9be35de2c8536627cff6dcfbc4887b4dea28550a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 29 Feb 2020 09:51:33 +0300 Subject: [PATCH 234/277] memfd: don't corrupt a state of the dumped fd Right now, criu uses a dumped fd to dump content of a memfd "file". Here are two reasons why we should not do this: * a state of a dumped fd doesn't have to be changed, but now criu calls lseek on it. This can be workarounded by using pread. * a dumped descriptor can be write-only. Reported-by: Mr Jenkins Cc: Nicolas Viennot Signed-off-by: Andrei Vagin --- criu/memfd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/criu/memfd.c b/criu/memfd.c index 30ccdf22c9..983e01b388 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -119,6 +119,7 @@ static int dump_memfd_inode(int fd, struct memfd_inode *inode, static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) { struct memfd_inode *inode; + int fd; list_for_each_entry(inode, &memfd_inodes, list) if ((inode->dev == st->st_dev) && (inode->ino == st->st_ino)) @@ -132,10 +133,18 @@ static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, co inode->ino = st->st_ino; inode->id = memfd_inode_ids++; - if (dump_memfd_inode(lfd, inode, name, st)) { + fd = open_proc(PROC_SELF, "fd/%d", lfd); + if (fd < 0) { + xfree(inode); + return NULL; + } + + if (dump_memfd_inode(fd, inode, name, st)) { + close(fd); xfree(inode); return NULL; } + close(fd); list_add_tail(&inode->list, &memfd_inodes); From e333749bdaf7f9de39dba03a92e0377b6cd57682 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 1 Mar 2020 01:04:20 +0300 Subject: [PATCH 235/277] zdtm/inhfd: force python to read new data from a file python 2.7 doesn't call the read system call if it's read file to the end once. The next seek allows to workaround this problem. inhfd/memfd.py hangs due to this issue. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- test/zdtm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index cac216411d..b037128dfd 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -697,7 +697,10 @@ def start(self): # regular files, so we loop. data = b'' while not data: - data = peer_file.read(16) + # In python 2.7, peer_file.read() doesn't call the read + # system call if it's read file to the end once. The + # next seek allows to workaround this problem. + data = os.read(peer_file.fileno(), 16) time.sleep(0.1) except Exception as e: print("Unable to read a peer file: %s" % e) From 52c5dd5f061f0c2b3de56f929aa064aaea7c07de Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 7 Feb 2020 15:59:45 +0300 Subject: [PATCH 236/277] fown: Don't fail on dumping files opened with O_PATH O_PATH opened files are special: they have empty file operations in kernel space, so there not that much we can do with them, even setting position is not allowed. Same applies to a signal number for owner settings. Signed-off-by: Cyrill Gorcunov Co-developed-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- criu/files-reg.c | 16 +++++++--- criu/files.c | 5 ++- criu/pie/parasite.c | 75 +++++++++++++++++++++++++++++---------------- 3 files changed, 64 insertions(+), 32 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 8e36eaa33e..b65ad3e711 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -1817,11 +1817,17 @@ static int do_open_reg(int ns_root_fd, struct reg_file_info *rfi, void *arg) if (fd < 0) return fd; - if ((rfi->rfe->pos != -1ULL) && - lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) { - pr_perror("Can't restore file pos"); - close(fd); - return -1; + /* + * O_PATH opened files carry empty fops in kernel, + * just ignore positioning at all. + */ + if (!(rfi->rfe->flags & O_PATH)) { + if (rfi->rfe->pos != -1ULL && + lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) { + pr_perror("Can't restore file pos"); + close(fd); + return -1; + } } return fd; diff --git a/criu/files.c b/criu/files.c index f7963bf547..3f1d77931d 100644 --- a/criu/files.c +++ b/criu/files.c @@ -399,7 +399,10 @@ static int fill_fd_params(struct pid *owner_pid, int fd, int lfd, pr_info("%d fdinfo %d: pos: %#16"PRIx64" flags: %16o/%#x\n", owner_pid->real, fd, p->pos, p->flags, (int)p->fd_flags); - ret = fcntl(lfd, F_GETSIG, 0); + if (p->flags & O_PATH) + ret = 0; + else + ret = fcntl(lfd, F_GETSIG, 0); if (ret < 0) { pr_perror("Can't get owner signum on %d", lfd); return -1; diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index 387a976da0..64b5bbb3e2 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -317,15 +317,60 @@ static int dump_creds(struct parasite_dump_creds *args) return -1; } +static int fill_fds_fown(int fd, struct fd_opts *p) +{ + int flags, ret; + struct f_owner_ex owner_ex; + uint32_t v[2]; + + /* + * For O_PATH opened files there is no owner at all. + */ + flags = sys_fcntl(fd, F_GETFL, 0); + if (flags < 0) { + pr_err("fcntl(%d, F_GETFL) -> %d\n", fd, flags); + return -1; + } + if (flags & O_PATH) { + p->fown.pid = 0; + return 0; + } + + ret = sys_fcntl(fd, F_GETOWN_EX, (long)&owner_ex); + if (ret) { + pr_err("fcntl(%d, F_GETOWN_EX) -> %d\n", fd, ret); + return -1; + } + + /* + * Simple case -- nothing is changed. + */ + if (owner_ex.pid == 0) { + p->fown.pid = 0; + return 0; + } + + ret = sys_fcntl(fd, F_GETOWNER_UIDS, (long)&v); + if (ret) { + pr_err("fcntl(%d, F_GETOWNER_UIDS) -> %d\n", fd, ret); + return -1; + } + + p->fown.uid = v[0]; + p->fown.euid = v[1]; + p->fown.pid_type = owner_ex.type; + p->fown.pid = owner_ex.pid; + + return 0; +} + static int fill_fds_opts(struct parasite_drain_fd *fds, struct fd_opts *opts) { int i; for (i = 0; i < fds->nr_fds; i++) { - int flags, fd = fds->fds[i], ret; + int flags, fd = fds->fds[i]; struct fd_opts *p = opts + i; - struct f_owner_ex owner_ex; - uint32_t v[2]; flags = sys_fcntl(fd, F_GETFD, 0); if (flags < 0) { @@ -335,30 +380,8 @@ static int fill_fds_opts(struct parasite_drain_fd *fds, struct fd_opts *opts) p->flags = (char)flags; - ret = sys_fcntl(fd, F_GETOWN_EX, (long)&owner_ex); - if (ret) { - pr_err("fcntl(%d, F_GETOWN_EX) -> %d\n", fd, ret); - return -1; - } - - /* - * Simple case -- nothing is changed. - */ - if (owner_ex.pid == 0) { - p->fown.pid = 0; - continue; - } - - ret = sys_fcntl(fd, F_GETOWNER_UIDS, (long)&v); - if (ret) { - pr_err("fcntl(%d, F_GETOWNER_UIDS) -> %d\n", fd, ret); + if (fill_fds_fown(fd, p)) return -1; - } - - p->fown.uid = v[0]; - p->fown.euid = v[1]; - p->fown.pid_type = owner_ex.type; - p->fown.pid = owner_ex.pid; } return 0; From a90e88d9fc42870a2acc5018d86649940fa7661c Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 7 Feb 2020 15:59:55 +0300 Subject: [PATCH 237/277] zdtm: add a test for files opened with O_PATH On these test without the patch ("fown: Don't fail on dumping files opened wit O_PATH") we trigger these errors: Error (criu/pie/parasite.c:340): fcntl(4, F_GETOWN_EX) -> -9 Error (criu/files.c:403): Can't get owner signum on 18: Bad file descriptor Error (criu/files-reg.c:1887): Can't restore file pos: Bad file descriptor Signed-off-by: Pavel Tikhomirov Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- test/zdtm/static/Makefile | 1 + test/zdtm/static/opath_file.c | 95 +++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 test/zdtm/static/opath_file.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 5afd18cd66..035b8fa9cf 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -363,6 +363,7 @@ TST_DIR = \ private_bind_propagation \ ghost_on_rofs \ overmounted_file \ + opath_file \ TST_DIR_FILE = \ chroot \ diff --git a/test/zdtm/static/opath_file.c b/test/zdtm/static/opath_file.c new file mode 100644 index 0000000000..602a5af27b --- /dev/null +++ b/test/zdtm/static/opath_file.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +#define TEST_FILE "test_file" +#define BUF_SIZE 4096 +#define fdinfo_field(str, field) !strncmp(str, field":", sizeof(field)) +#define pr_debug(format, arg...) test_msg("DBG: %s:%d: " format, __FILE__, __LINE__, ## arg) + +const char *test_doc = "Check open file with O_PATH preserved"; +const char *test_author = "Pavel Tikhomirov "; + +char *dirname; +TEST_OPTION(dirname, string, "directory name", 1); + +struct fdinfo { + int flags; +}; + +static int parse_self_fdinfo(int fd, struct fdinfo *fi) +{ + char path[PATH_MAX], line[BUF_SIZE]; + FILE *file; + int ret = -1; + unsigned long long val; + + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd); + file = fopen(path, "r"); + if (!file) { + pr_perror("fopen"); + return -1; + } + + while (fgets(line, sizeof(line), file)) { + if (fdinfo_field(line, "flags")) { + if (sscanf(line, "%*s %llu", &val) != 1) { + pr_err("failed to read flags: %s", line); + goto fail; + } + pr_debug("Open flags = %llu\n", val); + fi->flags = val; + ret = 0; + break; + } + } +fail: + fclose(file); + return ret; +} + +int main(int argc, char **argv) +{ + char test_file[PATH_MAX]; + struct fdinfo fi; + int fd; + + test_init(argc, argv); + + if (mkdir(dirname, 0700)) { + pr_perror("can't make directory %s", dirname); + exit(1); + } + + snprintf(test_file, sizeof(test_file), "%s/%s", dirname, TEST_FILE); + fd = creat(test_file, 0644); + if (fd == -1) { + pr_perror("cat't create %s", test_file); + return 1; + } + close(fd); + + fd = open(test_file, O_PATH); + if (fd == -1) { + pr_perror("cat't open file %s with O_PATH", test_file); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (parse_self_fdinfo(fd, &fi)) + return 1; + + if (!(fi.flags & O_PATH)) { + fail("File lost O_PATH open flag"); + return 1; + } + + close(fd); + pass(); + return 0; +} From 657268d8fcfbbdefa73495e63b6439f4dd16a0c7 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 7 Feb 2020 16:00:01 +0300 Subject: [PATCH 238/277] files: allow dumping opened symlinks To really open symlink file and not the regular file below it, one needs to do open with O_PATH|O_NOFOLLOW flags. Looks like systemd started to open /etc/localtime symlink this way sometimes, and before that nobody actually used this and thus we never supported this in CRIU. Error (criu/files-ext.c:96): Can't dump file 11 of that type [120777] (unknown /etc/localtime) Looks like it is quiet easy to support, as c/r of symlink file is almost the same as c/r of regular one. We need to only make fstatat not following links in check_path_remap. Also we need to take into account support of ghost symlinks. Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) Co-developed-by: Pavel Tikhomirov Signed-off-by: Pavel Tikhomirov --- criu/files-reg.c | 88 +++++++++++++++++++++++++++++++++++++---- criu/files.c | 3 +- images/ghost-file.proto | 2 + 3 files changed, 84 insertions(+), 9 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index b65ad3e711..1d24cc526e 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -323,19 +323,53 @@ static int mkreg_ghost(char *path, GhostFileEntry *gfe, struct cr_img *img) return ret; } +static int mklnk_ghost(char *path, GhostFileEntry *gfe) +{ + if (!gfe->symlnk_target) { + pr_err("Ghost symlink target is NULL for %s. Image from old CRIU?\n", path); + return -1; + } + + if (symlink(gfe->symlnk_target, path) < 0) { + /* + * ENOENT case is OK + * Take a look closer on create_ghost() function + */ + if (errno != ENOENT) + pr_perror("symlink(%s, %s) failed", gfe->symlnk_target, path); + return -1; + } + + return 0; +} + static int ghost_apply_metadata(const char *path, GhostFileEntry *gfe) { struct timeval tv[2]; int ret = -1; - if (chown(path, gfe->uid, gfe->gid) < 0) { - pr_perror("Can't reset user/group on ghost %s", path); - goto err; - } + if (S_ISLNK(gfe->mode)) { + if (lchown(path, gfe->uid, gfe->gid) < 0) { + pr_perror("Can't reset user/group on ghost %s", path); + goto err; + } - if (chmod(path, gfe->mode)) { - pr_perror("Can't set perms %o on ghost %s", gfe->mode, path); - goto err; + /* + * We have no lchmod() function, and fchmod() will fail on + * O_PATH | O_NOFOLLOW fd. Yes, we have fchmodat() + * function and flag AT_SYMLINK_NOFOLLOW described in + * man 2 fchmodat, but it is not currently implemented. %) + */ + } else { + if (chown(path, gfe->uid, gfe->gid) < 0) { + pr_perror("Can't reset user/group on ghost %s", path); + goto err; + } + + if (chmod(path, gfe->mode)) { + pr_perror("Can't set perms %o on ghost %s", gfe->mode, path); + goto err; + } } if (gfe->atim) { @@ -394,6 +428,9 @@ static int create_ghost(struct ghost_file *gf, GhostFileEntry *gfe, struct cr_im } else if (S_ISDIR(gfe->mode)) { if ((ret = mkdirpat(AT_FDCWD, path, gfe->mode)) < 0) msg = "Can't make ghost dir"; + } else if (S_ISLNK(gfe->mode)) { + if ((ret = mklnk_ghost(path, gfe)) < 0) + msg = "Can't create ghost symlink"; } else { if ((ret = mkreg_ghost(path, gfe, img)) < 0) msg = "Can't create ghost regfile"; @@ -781,6 +818,7 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de int exit_code = -1; GhostFileEntry gfe = GHOST_FILE_ENTRY__INIT; Timeval atim = TIMEVAL__INIT, mtim = TIMEVAL__INIT; + char pathbuf[PATH_MAX]; pr_info("Dumping ghost file contents (id %#x)\n", id); @@ -814,6 +852,36 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de gfe.size = st->st_size; } + /* + * We set gfe.symlnk_target only if we need to dump + * symlink content, otherwise we leave it NULL. + * It will be taken into account on restore in mklnk_ghost function. + */ + if (S_ISLNK(st->st_mode)) { + ssize_t ret; + + /* + * We assume that _fd opened with O_PATH | O_NOFOLLOW + * flags because S_ISLNK(st->st_mode). With current kernel version, + * it's looks like correct assumption in any case. + */ + ret = readlinkat(_fd, "", pathbuf, sizeof(pathbuf) - 1); + if (ret < 0) { + pr_perror("Can't readlinkat"); + goto err_out; + } + + pathbuf[ret] = 0; + + if (ret != st->st_size) { + pr_err("Buffer for readlinkat is too small: ret %zd, st_size %"PRId64", buf %u %s\n", + ret, st->st_size, PATH_MAX, pathbuf); + goto err_out; + } + + gfe.symlnk_target = pathbuf; + } + if (pb_write_one(img, &gfe, PB_GHOST_FILE)) goto err_out; @@ -1157,6 +1225,7 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, int ret, mntns_root; struct stat pst; const struct stat *ost = &parms->stat; + int flags = 0; if (parms->fs_type == PROC_SUPER_MAGIC) { /* The file points to /proc/pid/ where pid is a dead @@ -1253,7 +1322,10 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, if (mntns_root < 0) return -1; - ret = fstatat(mntns_root, rpath, &pst, 0); + if (S_ISLNK(parms->stat.st_mode)) + flags = AT_SYMLINK_NOFOLLOW; + + ret = fstatat(mntns_root, rpath, &pst, flags); if (ret < 0) { /* * Linked file, but path is not accessible (unless any diff --git a/criu/files.c b/criu/files.c index 3f1d77931d..f6ba39a306 100644 --- a/criu/files.c +++ b/criu/files.c @@ -545,7 +545,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, return do_dump_gen_file(&p, lfd, ops, e); } - if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode)) { + if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode) || + S_ISLNK(p.stat.st_mode)) { if (fill_fdlink(lfd, &p, &link)) return -1; diff --git a/images/ghost-file.proto b/images/ghost-file.proto index eda4664517..0576089fdd 100644 --- a/images/ghost-file.proto +++ b/images/ghost-file.proto @@ -15,6 +15,8 @@ message ghost_file_entry { optional timeval mtim = 8; optional bool chunks = 9; optional uint64 size = 10; + /* this field makes sense only when S_ISLNK(mode) */ + optional string symlnk_target = 11; } message ghost_chunk_entry { From 27807fb5ceecea572d3e9e414a6e5f2c0ff9f0f0 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 7 Feb 2020 16:00:05 +0300 Subject: [PATCH 239/277] zdtm: add a test on open symlink migration Signed-off-by: Pavel Tikhomirov Co-Developed-by: Vitaly Ostrosablin Signed-off-by: Vitaly Ostrosablin Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- test/zdtm/static/Makefile | 3 + test/zdtm/static/opath_file.c | 2 +- test/zdtm/static/symlink.c | 102 ++++++++++++++++++++++++++++++++++ test/zdtm/static/symlink01.c | 1 + 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 test/zdtm/static/symlink.c create mode 120000 test/zdtm/static/symlink01.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 035b8fa9cf..ee69612c77 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -364,6 +364,8 @@ TST_DIR = \ ghost_on_rofs \ overmounted_file \ opath_file \ + symlink \ + symlink01 \ TST_DIR_FILE = \ chroot \ @@ -539,6 +541,7 @@ clone_fs: LDLIBS += -pthread # we have to explicitly specify both .o and .d for this case: netns_sub_veth.o netns_sub_veth.d: CPPFLAGS += $(call pkg-cflags, libnl-3.0) netns_sub_veth: LDLIBS += $(call pkg-libs, libnl-route-3.0 libnl-3.0) +symlink01: CFLAGS += -DZDTM_UNLINK_SYMLINK socket-tcp-fin-wait1: CFLAGS += -D ZDTM_TCP_FIN_WAIT1 socket-tcp-fin-wait2: CFLAGS += -D ZDTM_TCP_FIN_WAIT2 diff --git a/test/zdtm/static/opath_file.c b/test/zdtm/static/opath_file.c index 602a5af27b..943f4eddb6 100644 --- a/test/zdtm/static/opath_file.c +++ b/test/zdtm/static/opath_file.c @@ -36,7 +36,7 @@ static int parse_self_fdinfo(int fd, struct fdinfo *fi) while (fgets(line, sizeof(line), file)) { if (fdinfo_field(line, "flags")) { - if (sscanf(line, "%*s %llu", &val) != 1) { + if (sscanf(line, "%*s %llo", &val) != 1) { pr_err("failed to read flags: %s", line); goto fail; } diff --git a/test/zdtm/static/symlink.c b/test/zdtm/static/symlink.c new file mode 100644 index 0000000000..074c800522 --- /dev/null +++ b/test/zdtm/static/symlink.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +#define TEST_FILE "test_file" +#define TEST_SYMLINK "test_symlink" + +const char *test_doc = "Check open symlink preserved"; +const char *test_author = "Pavel Tikhomirov "; + +char *dirname; +TEST_OPTION(dirname, string, "directory name", 1); + +int main(int argc, char **argv) +{ + char test_symlink[PATH_MAX]; + char test_file[PATH_MAX]; + char pathbuf[PATH_MAX]; + struct stat stb, sta; + int ret, fd; + + test_init(argc, argv); + + if (mkdir(dirname, 0700)) { + pr_perror("can't make directory %s", dirname); + exit(1); + } + + snprintf(test_file, sizeof(test_file), "%s/%s", dirname, TEST_FILE); + ret = creat(test_file, 0644); + if (ret == -1) { + pr_perror("cat't create %s", test_file); + return 1; + } + close(ret); + + snprintf(test_symlink, sizeof(test_symlink), "%s/%s", dirname, TEST_SYMLINK); + ret = symlink(test_file, test_symlink); + if (ret == -1) { + pr_perror("cat't symlink to %s", test_symlink); + return 1; + } + + fd = open(test_symlink, O_PATH | O_NOFOLLOW); + if (fd == -1) { + pr_perror("cat't open symlink %s", test_symlink); + return 1; + } + + ret = fstat(fd, &sta); + if (ret == -1) { + pr_perror("cat't fstat %s", test_symlink); + return 1; + } + + if (!S_ISLNK(sta.st_mode)) { + pr_perror("file is not symlink %s", test_symlink); + return 1; + } + +#ifdef ZDTM_UNLINK_SYMLINK + if (unlink(test_symlink)) { + pr_perror("can't unlink symlink %s", test_symlink); + return 1; + } +#endif + + test_daemon(); + test_waitsig(); + + ret = fstat(fd, &stb); + if (ret == -1) { + fail("cat't fstat %s", test_symlink); + return 1; + } + + if (!S_ISLNK(stb.st_mode)) { + fail("file is not symlink %s", test_symlink); + return 1; + } + + ret = readlinkat(fd, "", pathbuf, sizeof(pathbuf) - 1); + if (ret < 0) { + fail("Can't readlinkat"); + return 1; + } + pathbuf[ret] = 0; + + if (strcmp(test_file, pathbuf)) { + fail("symlink points to %s but %s expected", pathbuf, test_file); + return 1; + } + + close(fd); + pass(); + return 0; +} diff --git a/test/zdtm/static/symlink01.c b/test/zdtm/static/symlink01.c new file mode 120000 index 0000000000..e2d071ea4c --- /dev/null +++ b/test/zdtm/static/symlink01.c @@ -0,0 +1 @@ +symlink.c \ No newline at end of file From 84ad71837e8dbd518706345246d3a6d72f75d6a4 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 5 Mar 2020 08:30:23 +0300 Subject: [PATCH 240/277] zdtm/fifo_loop: don't try to write more than pipe size ... otherwise write() can block. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- test/zdtm/transition/fifo_loop.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/zdtm/transition/fifo_loop.c b/test/zdtm/transition/fifo_loop.c index 2e28320ba6..b028c2fd5f 100644 --- a/test/zdtm/transition/fifo_loop.c +++ b/test/zdtm/transition/fifo_loop.c @@ -39,6 +39,7 @@ int main(int argc, char **argv) int i; uint8_t buf[0x100000]; char *file_path; + int pipe_size; test_init(argc, argv); @@ -104,6 +105,13 @@ int main(int argc, char **argv) exit(1); } + pipe_size = fcntl(writefd, F_SETPIPE_SZ, sizeof(buf)); + if (pipe_size != sizeof(buf)) { + pr_perror("fcntl(writefd, F_GETPIPE_SZ) -> %d", pipe_size); + kill(0, SIGKILL); + exit(1); + } + file_path = path[i - 1]; readfd = open(file_path, O_RDONLY); if (readfd < 0) { @@ -138,13 +146,14 @@ int main(int argc, char **argv) for (p = rbuf, len = wlen; len > 0; p += rlen, len -= rlen) { rlen = read(readfd, p, len); + if (rlen < 0 && errno == EINTR) { + continue; + } + if (rlen <= 0) break; } - if (rlen < 0 && errno == EINTR) - continue; - if (len > 0) { fail("read failed: %m\n"); ret = 1; From 8ca165e94c69a1590d1507314a1fbffbc7503c0b Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 14:45:34 +0000 Subject: [PATCH 241/277] criu: Remove compel.h includes The plan is to remove "compel.h". That file only includes other headers (which may be not needed). If we aim for one-include-for-compel, we could instead paste all subheaders into "compel.h". Rather, I think it's worth to migrate to more fine-grained compel headers than follow the strategy 'one header to rule them all'. Further, the header creates problems for cross-compilation: it's included in files, those are used by host-compel. Which rightfully confuses compiler/linker as host's definitions for fpu regs/other platform details get drained into host's compel. As a first step - stop including "compel.h" in criu. Signed-off-by: Dmitry Safonov --- criu/aio.c | 2 +- criu/arch/aarch64/crtools.c | 2 +- criu/arch/arm/crtools.c | 3 +-- criu/arch/ppc64/crtools.c | 2 +- criu/arch/s390/crtools.c | 2 +- criu/arch/x86/crtools.c | 2 +- criu/arch/x86/sys-exec-tbl.c | 1 - criu/cr-restore.c | 1 - criu/include/proc_parse.h | 2 +- criu/kerndat.c | 1 - criu/mem.c | 2 +- criu/parasite-syscall.c | 2 -- criu/pie/pie-relocs.h | 2 -- criu/seize.c | 1 - criu/vdso.c | 1 - 15 files changed, 8 insertions(+), 18 deletions(-) diff --git a/criu/aio.c b/criu/aio.c index 45651f2d3f..6ee65d5f4e 100644 --- a/criu/aio.c +++ b/criu/aio.c @@ -11,7 +11,7 @@ #include "parasite.h" #include "parasite-syscall.h" #include "images/mm.pb-c.h" -#include +#include "compel/infect.h" #define NR_IOEVENTS_IN_NPAGES(npages) ((PAGE_SIZE * (npages) - sizeof(struct aio_ring)) / sizeof(struct io_event)) diff --git a/criu/arch/aarch64/crtools.c b/criu/arch/aarch64/crtools.c index f98743a23b..76bd1fea75 100644 --- a/criu/arch/aarch64/crtools.c +++ b/criu/arch/aarch64/crtools.c @@ -19,7 +19,7 @@ #include "util.h" #include "cpu.h" #include "restorer.h" -#include +#include "compel/infect.h" #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))(src)->e diff --git a/criu/arch/arm/crtools.c b/criu/arch/arm/crtools.c index c216cdc5c0..840d489a65 100644 --- a/criu/arch/arm/crtools.c +++ b/criu/arch/arm/crtools.c @@ -18,8 +18,7 @@ #include "elf.h" #include "parasite-syscall.h" #include "restorer.h" - -#include +#include "compel/infect.h" #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))((src)->ARM_##e) diff --git a/criu/arch/ppc64/crtools.c b/criu/arch/ppc64/crtools.c index 5a5966ad48..0d9f49c3fe 100644 --- a/criu/arch/ppc64/crtools.c +++ b/criu/arch/ppc64/crtools.c @@ -17,7 +17,7 @@ #include "log.h" #include "util.h" #include "cpu.h" -#include +#include "compel/infect.h" #include "protobuf.h" #include "images/core.pb-c.h" diff --git a/criu/arch/s390/crtools.c b/criu/arch/s390/crtools.c index 238035b763..000b7779f8 100644 --- a/criu/arch/s390/crtools.c +++ b/criu/arch/s390/crtools.c @@ -17,7 +17,7 @@ #include "log.h" #include "util.h" #include "cpu.h" -#include +#include "compel/infect.h" #include "protobuf.h" #include "images/core.pb-c.h" diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index e4073c27b6..9c8beeeddd 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -1,5 +1,5 @@ #include "compel/asm/fpu.h" -#include "compel/compel.h" +#include "compel/infect.h" #include "compel/plugins/std/syscall-codes.h" #include "cpu.h" #include "cr_options.h" diff --git a/criu/arch/x86/sys-exec-tbl.c b/criu/arch/x86/sys-exec-tbl.c index 608dc2510d..225b8a1535 100644 --- a/criu/arch/x86/sys-exec-tbl.c +++ b/criu/arch/x86/sys-exec-tbl.c @@ -1,4 +1,3 @@ -#include static struct syscall_exec_desc sc_exec_table_64[] = { #include "sys-exec-tbl-64.c" diff --git a/criu/cr-restore.c b/criu/cr-restore.c index fa12db98e3..e5a827753a 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -68,7 +68,6 @@ #include "timerfd.h" #include "action-scripts.h" #include "shmem.h" -#include #include "aio.h" #include "lsm.h" #include "seccomp.h" diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h index 96a097b3d8..fd50ff47e1 100644 --- a/criu/include/proc_parse.h +++ b/criu/include/proc_parse.h @@ -3,7 +3,7 @@ #include -#include +#include "compel/infect.h" #define PROC_TASK_COMM_LEN 32 #define PROC_TASK_COMM_LEN_FMT "(%31s" diff --git a/criu/kerndat.c b/criu/kerndat.c index 8ac83820b9..2ad72c3505 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -33,7 +33,6 @@ #include "net.h" #include "tun.h" #include -#include #include "netfilter.h" #include "fsnotify.h" #include "linux/userfaultfd.h" diff --git a/criu/mem.c b/criu/mem.c index 4e110c9e96..55022d94a2 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -29,7 +29,7 @@ #include "pagemap-cache.h" #include "fault-injection.h" #include "prctl.h" -#include +#include "compel/infect-util.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index e5a8194e58..b649d1b51a 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -45,8 +45,6 @@ #include "infect-rpc.h" #include "pie/parasite-blob.h" -#include - unsigned long get_exec_start(struct vm_area_list *vmas) { struct vma_area *vma_area; diff --git a/criu/pie/pie-relocs.h b/criu/pie/pie-relocs.h index 6797486c2d..e36126be60 100644 --- a/criu/pie/pie-relocs.h +++ b/criu/pie/pie-relocs.h @@ -1,8 +1,6 @@ #ifndef __PIE_RELOCS_H__ #define __PIE_RELOCS_H__ -#include - #include "common/config.h" #include "common/compiler.h" diff --git a/criu/seize.c b/criu/seize.c index 0ba2d9b1db..f973806d99 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -23,7 +23,6 @@ #include "string.h" #include "xmalloc.h" #include "util.h" -#include #define NR_ATTEMPTS 5 diff --git a/criu/vdso.c b/criu/vdso.c index 19ba4765df..433a547286 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -20,7 +20,6 @@ #include "criu-log.h" #include "mem.h" #include "vma.h" -#include #include #ifdef LOG_PREFIX From fb386c23b184f507f6705b8a34c69d3eea1ea548 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 15:00:08 +0000 Subject: [PATCH 242/277] compel: Remove compel.h The file only includes other headers (which may be not needed). If we aim for one-include-for-compel, we could instead paste all subheaders into "compel.h". Rather, I think it's worth to migrate to more fine-grained compel headers than follow the strategy 'one header to rule them all'. Further, the header creates problems for cross-compilation: it's included in files, those are used by host-compel. Which rightfully confuses compiler/linker as host's definitions for fpu regs/other platform details get drained into host's compel. Signed-off-by: Dmitry Safonov --- Documentation/compel.txt | 2 +- compel/arch/aarch64/src/lib/handle-elf.c | 3 +-- compel/arch/arm/src/lib/handle-elf.c | 3 +-- compel/arch/arm/src/lib/infect.c | 1 + compel/arch/ppc64/src/lib/cpu.c | 1 + compel/arch/ppc64/src/lib/handle-elf.c | 3 +-- compel/arch/s390/src/lib/handle-elf.c | 3 +-- compel/arch/s390/src/lib/infect.c | 1 + compel/arch/x86/src/lib/handle-elf.c | 3 +-- compel/arch/x86/src/lib/infect.c | 1 + compel/include/log.h | 3 +-- compel/include/uapi/compel.h | 14 -------------- compel/src/lib/handle-elf.c | 6 ++---- compel/src/lib/log.c | 3 --- compel/src/main.c | 2 -- compel/test/fdspy/spy.c | 1 - compel/test/infect/spy.c | 1 - compel/test/rsys/spy.c | 2 -- include/common/scm.h | 2 ++ 19 files changed, 15 insertions(+), 40 deletions(-) delete mode 100644 compel/include/uapi/compel.h diff --git a/Documentation/compel.txt b/Documentation/compel.txt index 744a3b35db..6ccd208615 100644 --- a/Documentation/compel.txt +++ b/Documentation/compel.txt @@ -86,7 +86,7 @@ Infecting code ~~~~~~~~~~~~~~ The parasitic code is compiled and converted to a header using *compel*, and included here. -*#include * +*#include * *#include "parasite.h"* diff --git a/compel/arch/aarch64/src/lib/handle-elf.c b/compel/arch/aarch64/src/lib/handle-elf.c index 1c3686c484..1ee65ee2ca 100644 --- a/compel/arch/aarch64/src/lib/handle-elf.c +++ b/compel/arch/aarch64/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/arm/src/lib/handle-elf.c b/compel/arch/arm/src/lib/handle-elf.c index 8abf8dad1d..5b8d00a6f7 100644 --- a/compel/arch/arm/src/lib/handle-elf.c +++ b/compel/arch/arm/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/arm/src/lib/infect.c b/compel/arch/arm/src/lib/infect.c index c17cb9c9b6..0053bef581 100644 --- a/compel/arch/arm/src/lib/infect.c +++ b/compel/arch/arm/src/lib/infect.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include "common/page.h" diff --git a/compel/arch/ppc64/src/lib/cpu.c b/compel/arch/ppc64/src/lib/cpu.c index 338ab4891f..7a39727908 100644 --- a/compel/arch/ppc64/src/lib/cpu.c +++ b/compel/arch/ppc64/src/lib/cpu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "compel-cpu.h" diff --git a/compel/arch/ppc64/src/lib/handle-elf.c b/compel/arch/ppc64/src/lib/handle-elf.c index 3d4020f597..f29fdc8a39 100644 --- a/compel/arch/ppc64/src/lib/handle-elf.c +++ b/compel/arch/ppc64/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/s390/src/lib/handle-elf.c b/compel/arch/s390/src/lib/handle-elf.c index 01a8bf4c8b..6ed382c92f 100644 --- a/compel/arch/s390/src/lib/handle-elf.c +++ b/compel/arch/s390/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index 7e7d24ce21..5a4675449d 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/compel/arch/x86/src/lib/handle-elf.c b/compel/arch/x86/src/lib/handle-elf.c index 62fb28f494..938999b2e1 100644 --- a/compel/arch/x86/src/lib/handle-elf.c +++ b/compel/arch/x86/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index 11e7f4c91f..9c4abb60c2 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -3,6 +3,7 @@ #include #include #include +#include #include diff --git a/compel/include/log.h b/compel/include/log.h index 559f909cea..49e65bb507 100644 --- a/compel/include/log.h +++ b/compel/include/log.h @@ -1,8 +1,7 @@ #ifndef COMPEL_LOG_H__ #define COMPEL_LOG_H__ -#include "uapi/compel/compel.h" -#include "uapi/compel/loglevels.h" +#include "uapi/compel/log.h" #ifndef LOG_PREFIX # define LOG_PREFIX diff --git a/compel/include/uapi/compel.h b/compel/include/uapi/compel.h deleted file mode 100644 index 318a472da9..0000000000 --- a/compel/include/uapi/compel.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef UAPI_COMPEL_H__ -#define UAPI_COMPEL_H__ - -#include -#include - -#include -#include -#include -#include -#include -#include - -#endif /* UAPI_COMPEL_H__ */ diff --git a/compel/src/lib/handle-elf.c b/compel/src/lib/handle-elf.c index ca7c53b711..69d5104b66 100644 --- a/compel/src/lib/handle-elf.c +++ b/compel/src/lib/handle-elf.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -12,8 +12,6 @@ #include #include -#include "uapi/compel.h" - #include "handle-elf.h" #include "piegen.h" #include "log.h" @@ -228,7 +226,7 @@ int __handle_elf(void *mem, size_t size) } pr_out("/* Autogenerated from %s */\n", opts.input_filename); - pr_out("#include \n"); + pr_out("#include \n"); for (i = 0; i < symtab_hdr->sh_size / symtab_hdr->sh_entsize; i++) { Elf_Sym *sym = &symbols[i]; diff --git a/compel/src/lib/log.c b/compel/src/lib/log.c index d195343e45..c86be02c5f 100644 --- a/compel/src/lib/log.c +++ b/compel/src/lib/log.c @@ -4,11 +4,8 @@ #include #include #include - #include -#include - #include "log.h" static unsigned int current_loglevel = COMPEL_DEFAULT_LOGLEVEL; diff --git a/compel/src/main.c b/compel/src/main.c index 8b2c8bc8d9..36127c357d 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -13,8 +13,6 @@ #include #include -#include "uapi/compel/compel.h" - #include "version.h" #include "piegen.h" #include "log.h" diff --git a/compel/test/fdspy/spy.c b/compel/test/fdspy/spy.c index 258e3ab75b..1a373b6bb0 100644 --- a/compel/test/fdspy/spy.c +++ b/compel/test/fdspy/spy.c @@ -5,7 +5,6 @@ #include #include -#include #include "parasite.h" #define PARASITE_CMD_GETFD PARASITE_USER_CMDS diff --git a/compel/test/infect/spy.c b/compel/test/infect/spy.c index a5aba73089..b5f8b25593 100644 --- a/compel/test/infect/spy.c +++ b/compel/test/infect/spy.c @@ -3,7 +3,6 @@ #include #include -#include #include "parasite.h" #define PARASITE_CMD_INC PARASITE_USER_CMDS diff --git a/compel/test/rsys/spy.c b/compel/test/rsys/spy.c index f5c999d5a2..98654efcf3 100644 --- a/compel/test/rsys/spy.c +++ b/compel/test/rsys/spy.c @@ -4,8 +4,6 @@ #include #include -#include - static void print_vmsg(unsigned int lvl, const char *fmt, va_list parms) { printf("\tLC%u: ", lvl); diff --git a/include/common/scm.h b/include/common/scm.h index ab27137b82..a8eb9ec4c7 100644 --- a/include/common/scm.h +++ b/include/common/scm.h @@ -3,7 +3,9 @@ #include #include +#include #include +#include /* * Because of kernel doing kmalloc for user data passed From e419f2e5c9d01e765cbe2c95d2fc7b76a02d66d4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 15:04:15 +0000 Subject: [PATCH 243/277] travis: Add aarch64-cross test on amd64 Fixes: #924 Signed-off-by: Dmitry Safonov --- .travis.yml | 4 +++ scripts/build/Dockerfile.aarch64-cross | 45 ++++++++++++++++++++++++++ scripts/build/Makefile | 2 +- 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 scripts/build/Dockerfile.aarch64-cross diff --git a/.travis.yml b/.travis.yml index 7c36af0064..ffa82f15f6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -86,6 +86,10 @@ jobs: arch: amd64 env: TR_ARCH=armv7-cross dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=aarch64-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/build/Dockerfile.aarch64-cross b/scripts/build/Dockerfile.aarch64-cross new file mode 100644 index 0000000000..38229497a9 --- /dev/null +++ b/scripts/build/Dockerfile.aarch64-cross @@ -0,0 +1,45 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ + dpkg --add-architecture arm64 && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-arm64 \ + libc6-dev-arm64-cross \ + libc6-arm64-cross \ + libbz2-dev:arm64 \ + libexpat1-dev:arm64 \ + ncurses-dev:arm64 \ + libssl-dev:arm64 \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:arm64 \ + libprotobuf-dev:arm64 \ + libnet-dev:arm64 \ + libprotobuf-c-dev:arm64 \ + libcap-dev:arm64 \ + libaio-dev:arm64 \ + libnl-route-3-dev:arm64 + +ENV CROSS_TRIPLE=aarch64-linux-gnu +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=aarch64 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index d093ce76c9..913a86d6c2 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,7 +2,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker -TARGETS += armv7-cross +TARGETS += armv7-cross aarch64-cross all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all From 037aaf9e525f83cd3a7989bbe31eb9c2b219b809 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 16:43:48 +0000 Subject: [PATCH 244/277] travis: Use debian/buster as base for cross build tests Jessie is called 'oldoldstable', migrate to Buster. Suggested-by: Adrian Reber Signed-off-by: Dmitry Safonov --- scripts/build/Dockerfile.aarch64-cross | 2 +- scripts/build/Dockerfile.armv7-cross | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/build/Dockerfile.aarch64-cross b/scripts/build/Dockerfile.aarch64-cross index 38229497a9..252e0f8754 100644 --- a/scripts/build/Dockerfile.aarch64-cross +++ b/scripts/build/Dockerfile.aarch64-cross @@ -1,7 +1,7 @@ FROM dockcross/base:latest # Add the cross compiler sources -RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ dpkg --add-architecture arm64 && \ apt-get install emdebian-archive-keyring diff --git a/scripts/build/Dockerfile.armv7-cross b/scripts/build/Dockerfile.armv7-cross index 434934aad1..17a55561ec 100644 --- a/scripts/build/Dockerfile.armv7-cross +++ b/scripts/build/Dockerfile.armv7-cross @@ -1,7 +1,7 @@ FROM dockcross/base:latest # Add the cross compiler sources -RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ dpkg --add-architecture armhf && \ apt-get install emdebian-archive-keyring From 717de3e80a79bce106a66c932bcf0e0f1329def5 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 10 Mar 2020 17:40:57 +0300 Subject: [PATCH 245/277] crit-recode: skip (not try to parse) nftables raw image We should ignore (not parse) images that has non-crtool format, that images has no magic number (RAW_IMAGE_MAGIC equals 0). nftables images has format compatible with `nft -f /proc/self/fd/0` input format. Reported-by: Mr Jenkins Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- test/crit-recode.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/crit-recode.py b/test/crit-recode.py index a7dcc72729..adaf337336 100755 --- a/test/crit-recode.py +++ b/test/crit-recode.py @@ -47,6 +47,8 @@ def recode_and_check(imgf, o_img, pretty): continue if imgf_b.startswith(b'ip6tables-'): continue + if imgf_b.startswith(b'nftables-'): + continue if imgf_b.startswith(b'route-'): continue if imgf_b.startswith(b'route6-'): From e7c8ac1f83255a3326f664a61334c5a06459a319 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 5 Mar 2020 20:46:16 +0200 Subject: [PATCH 246/277] travis: add ppc64-cross test on amd64 Signed-off-by: Mike Rapoport --- .travis.yml | 4 +++ scripts/build/Dockerfile.ppc64-cross | 45 ++++++++++++++++++++++++++++ scripts/build/Makefile | 2 +- 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 scripts/build/Dockerfile.ppc64-cross diff --git a/.travis.yml b/.travis.yml index ffa82f15f6..9928f16c24 100644 --- a/.travis.yml +++ b/.travis.yml @@ -90,6 +90,10 @@ jobs: arch: amd64 env: TR_ARCH=aarch64-cross dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=ppc64-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/build/Dockerfile.ppc64-cross b/scripts/build/Dockerfile.ppc64-cross new file mode 100644 index 0000000000..44061c558c --- /dev/null +++ b/scripts/build/Dockerfile.ppc64-cross @@ -0,0 +1,45 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ + dpkg --add-architecture ppc64el && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-ppc64el \ + libc6-dev-ppc64el-cross \ + libc6-ppc64el-cross \ + libbz2-dev:ppc64el \ + libexpat1-dev:ppc64el \ + ncurses-dev:ppc64el \ + libssl-dev:ppc64el \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:ppc64el \ + libprotobuf-dev:ppc64el \ + libnet-dev:ppc64el \ + libprotobuf-c-dev:ppc64el \ + libcap-dev:ppc64el \ + libaio-dev:ppc64el \ + libnl-route-3-dev:ppc64el + +ENV CROSS_TRIPLE=powerpc64le-linux-gnu +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=ppc64 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index 913a86d6c2..855539152f 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,7 +2,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker -TARGETS += armv7-cross aarch64-cross +TARGETS += armv7-cross aarch64-cross ppc64-cross all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all From e0fca8ba7a165c178d2cfb50ff31c24fef093616 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 10:35:42 -0700 Subject: [PATCH 247/277] mailmap: update my email Signed-off-by: Andrei Vagin --- .mailmap | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index d8c3f594d1..6f046b972d 100644 --- a/.mailmap +++ b/.mailmap @@ -1,6 +1,8 @@ Stanislav Kinsbursky Pavel Emelyanov -Andrey Vagin -Andrey Vagin -Andrey Vagin Andrew Vagin +Andrei Vagin +Andrei Vagin +Andrei Vagin +Andrei Vagin +Andrei Vagin Cyrill Gorcunov From 9433b7b9db3ed9e27714b311cdf7b601fe51f5af Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 20 Mar 2020 23:12:59 +0000 Subject: [PATCH 248/277] make: use cflags/ldflags for config.h detection mechanism The config.h detection scripts should use the provided CFLAGS/LDFLAGS as it tries to link libnl, libnet, and others. Signed-off-by: Nicolas Viennot --- scripts/nmk/scripts/utils.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/nmk/scripts/utils.mk b/scripts/nmk/scripts/utils.mk index 0cf216bc06..b9790615ca 100644 --- a/scripts/nmk/scripts/utils.mk +++ b/scripts/nmk/scripts/utils.mk @@ -3,7 +3,7 @@ ifndef ____nmk_defined__utils # # Usage: option := $(call try-compile,language,source-to-build,cc-options,cc-defines) try-compile = $(shell sh -c 'echo "$(2)" | \ - $(CC) $(4) -x $(1) - $(3) -o /dev/null > /dev/null 2>&1 && \ + $(CC) $(CFLAGS) $(LDFLAGS) $(4) -x $(1) - $(3) -o /dev/null > /dev/null 2>&1 && \ echo true || echo false') # From 23342610c6b34de28a46cc72db377a4136cdf5b4 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 15 Mar 2020 10:44:14 +0300 Subject: [PATCH 249/277] mem: dump shared memory file descriptors Any shared memroy mapping can be opened via /proc/self/maps_files/. Such file descriptors look like memfd file descriptors, so they can be dumped by the same way. Signed-off-by: Andrei Vagin --- criu/files.c | 2 +- criu/include/memfd.h | 2 +- criu/memfd.c | 11 +++++++---- criu/proc_parse.c | 38 ++++++++++---------------------------- 4 files changed, 19 insertions(+), 34 deletions(-) diff --git a/criu/files.c b/criu/files.c index f6ba39a306..a1fd267642 100644 --- a/criu/files.c +++ b/criu/files.c @@ -552,7 +552,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, p.link = &link; - if (is_memfd(p.stat.st_dev, &link.name[1])) + if (is_memfd(p.stat.st_dev)) ops = &memfd_dump_ops; else if (link.name[1] == '/') ops = ®file_dump_ops; diff --git a/criu/include/memfd.h b/criu/include/memfd.h index 2d8eda5458..4189766fdc 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -8,7 +8,7 @@ struct fd_parms; struct file_desc; -extern int is_memfd(dev_t dev, const char *path); +extern int is_memfd(dev_t dev); extern int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms); extern const struct fdtype_ops memfd_dump_ops; diff --git a/criu/memfd.c b/criu/memfd.c index 983e01b388..bca6900cb9 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -58,15 +58,14 @@ static LIST_HEAD(memfd_inodes); static u32 memfd_inode_ids = 1; -int is_memfd(dev_t dev, const char *path) +int is_memfd(dev_t dev) { /* * TODO When MAP_HUGETLB is used, the file device is not shmem_dev, * Note that other parts of CRIU have similar issues, see * is_anon_shmem_map(). */ - return dev == kdat.shmem_dev && - !strncmp(path, MEMFD_PREFIX, MEMFD_PREFIX_LEN); + return dev == kdat.shmem_dev; } static int dump_memfd_inode(int fd, struct memfd_inode *inode, @@ -167,7 +166,11 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) link = p->link; strip_deleted(link); - name = &link->name[1+MEMFD_PREFIX_LEN]; + /* link->name is always started with "." which has to be skipped. */ + if (strncmp(link->name + 1, MEMFD_PREFIX, MEMFD_PREFIX_LEN) == 0) + name = &link->name[1 + MEMFD_PREFIX_LEN]; + else + name = link->name + 1; inode = dump_unique_memfd_inode(lfd, name, &p->stat); if (!inode) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 468afcdf38..980342870b 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -305,7 +305,7 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, vfi_dev = makedev(vfi->dev_maj, vfi->dev_min); - if (is_memfd(vfi_dev, fname)) { + if (is_memfd(vfi_dev)) { struct fd_link link; link.len = strlen(fname); strlcpy(link.name, fname, sizeof(link.name)); @@ -596,39 +596,21 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, goto err; } - /* - * /dev/zero stands for anon-shared mapping - * otherwise it's some file mapping. - * - * We treat memfd mappings as regular file mappings because - * their backing can be seen as files, which is easy to - * support. So even though memfd is an anonymous shmem, we - * treat it differently. - * Note: maybe we should revisit this as /proc/map_files/ - * may not always be accessible. - */ - - if (is_memfd(st_buf->st_dev, file_path)) { - vma_area->e->status |= VMA_AREA_MEMFD; - goto normal_file; - } - - if (is_anon_shmem_map(st_buf->st_dev)) { - if (!(vma_area->e->flags & MAP_SHARED)) - goto err_bogus_mapping; + if (is_anon_shmem_map(st_buf->st_dev) && !strncmp(file_path, "/SYSV", 5)) { vma_area->e->flags |= MAP_ANONYMOUS; vma_area->e->status |= VMA_ANON_SHARED; vma_area->e->shmid = st_buf->st_ino; - - if (!strncmp(file_path, "/SYSV", 5)) { - pr_info("path: %s\n", file_path); - vma_area->e->status |= VMA_AREA_SYSVIPC; - } else { + if (!(vma_area->e->flags & MAP_SHARED)) + goto err_bogus_mapping; + pr_info("path: %s\n", file_path); + vma_area->e->status |= VMA_AREA_SYSVIPC; + } else { + if (is_anon_shmem_map(st_buf->st_dev)) { + vma_area->e->status |= VMA_AREA_MEMFD; if (fault_injected(FI_HUGE_ANON_SHMEM_ID)) vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE; } - } else { -normal_file: + if (vma_area->e->flags & MAP_PRIVATE) vma_area->e->status |= VMA_FILE_PRIVATE; else From 27c6039f76f5417788531e4515afe7990258173d Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 19 Mar 2020 09:37:18 +0300 Subject: [PATCH 250/277] mem/vma: set VMA_FILE_{PRIVATE,SHARED} if a vma file is borrowed Here is a fast path when two consequent vma-s share the same file. But one of these vma-s can map a file with MAP_SHARED, but another one can map it with MAP_PRIVATE and we need to take this into account. --- criu/proc_parse.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 980342870b..60aba87887 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -584,6 +584,14 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, vma_area->e->shmid = prev->e->shmid; vma_area->vmst = prev->vmst; vma_area->mnt_id = prev->mnt_id; + + if (!(vma_area->e->status & VMA_AREA_SYSVIPC)) { + vma_area->e->status &= ~(VMA_FILE_PRIVATE | VMA_FILE_SHARED); + if (vma_area->e->flags & MAP_PRIVATE) + vma_area->e->status |= VMA_FILE_PRIVATE; + else + vma_area->e->status |= VMA_FILE_SHARED; + } } else if (*vm_file_fd >= 0) { struct stat *st_buf = vma_area->vmst; From 941d31b332e2e38ea7146996e1235089f16c98ac Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 15 Mar 2020 10:53:51 +0300 Subject: [PATCH 251/277] test/zdtmp: add a test to C/R shared memory file descriptors Any shared memory region can be openned via /proc/self/map_files. Signed-off-by: Andrei Vagin --- test/zdtm/static/Makefile | 2 + test/zdtm/static/shmemfd-priv.c | 84 ++++++++++++++++++++++ test/zdtm/static/shmemfd-priv.desc | 1 + test/zdtm/static/shmemfd.c | 107 +++++++++++++++++++++++++++++ test/zdtm/static/shmemfd.desc | 1 + 5 files changed, 195 insertions(+) create mode 100644 test/zdtm/static/shmemfd-priv.c create mode 100644 test/zdtm/static/shmemfd-priv.desc create mode 100644 test/zdtm/static/shmemfd.c create mode 100644 test/zdtm/static/shmemfd.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index ee69612c77..a8e4107d3b 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -224,6 +224,8 @@ TST_NOFILE := \ memfd01 \ memfd02 \ memfd03 \ + shmemfd \ + shmemfd-priv \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/shmemfd-priv.c b/test/zdtm/static/shmemfd-priv.c new file mode 100644 index 0000000000..bbdb46905b --- /dev/null +++ b/test/zdtm/static/shmemfd-priv.c @@ -0,0 +1,84 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Test C/R of shared memory file descriptors"; +const char *test_author = "Andrei Vagin "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +int main(int argc, char *argv[]) +{ + void *addr, *priv_addr, *addr2; + char path[4096]; + int fd; + + test_init(argc, argv); + + addr = mmap(NULL, 5 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (addr == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + *(int *) addr = 1; + *(int *) (addr + PAGE_SIZE) = 11; + *(int *) (addr + 2 * PAGE_SIZE) = 111; + + snprintf(path, sizeof(path), "/proc/self/map_files/%lx-%lx", + (long)addr, (long)addr + 5 * PAGE_SIZE); + fd = open(path, O_RDWR | O_LARGEFILE); + if (fd < 0) + err(1, "Can't open %s", path); + + priv_addr = mmap(NULL, 5 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FILE | MAP_PRIVATE, fd, PAGE_SIZE); + if (priv_addr == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + addr2 = mmap(NULL, 5 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 2 * PAGE_SIZE); + if (addr2 == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + *(int *) (priv_addr + PAGE_SIZE) = 22; + + test_daemon(); + test_waitsig(); + + if (*(int *) (priv_addr + PAGE_SIZE) != 22) { + fail("the second page of the private mapping is corrupted"); + return 1; + } + if (*(int *) (priv_addr) != 11) { + fail("the first page of the private mapping is corrupted"); + return 1; + } + if (*(int *) (addr2) != 111) { + fail("the first page of the second shared mapping is corrupted"); + return 1; + } + *(int *) (addr2) = 333; + if (*(int *) (addr + 2 * PAGE_SIZE) != 333) { + fail("the first page of the second shared mapping isn't shared"); + return 1; + } + *(int *) (addr + 3 * PAGE_SIZE) = 444; + if (*(int *) (priv_addr + 2 * PAGE_SIZE) != 444) { + fail("the third page of the private mapping is corrupted"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/shmemfd-priv.desc b/test/zdtm/static/shmemfd-priv.desc new file mode 100644 index 0000000000..d969725f6d --- /dev/null +++ b/test/zdtm/static/shmemfd-priv.desc @@ -0,0 +1 @@ +{'flavor': 'h ns', 'flags': 'suid'} diff --git a/test/zdtm/static/shmemfd.c b/test/zdtm/static/shmemfd.c new file mode 100644 index 0000000000..b65faa2e11 --- /dev/null +++ b/test/zdtm/static/shmemfd.c @@ -0,0 +1,107 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Test C/R of shared memory file descriptors"; +const char *test_author = "Andrei Vagin "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +int main(int argc, char *argv[]) +{ + int fd, fl_flags1, fl_flags2, fd_flags1, fd_flags2; + struct statfs statfs1, statfs2; + off_t pos1, pos2; + char path[4096]; + char buf[5]; + void *addr; + + test_init(argc, argv); + + addr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (addr == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + snprintf(path, sizeof(path), "/proc/self/map_files/%lx-%lx", + (long)addr, (long)addr + PAGE_SIZE); + fd = open(path, O_RDWR | O_LARGEFILE); + if (fd < 0) + err(1, "Can't open %s", path); + ftruncate(fd, 0); + munmap(addr, PAGE_SIZE); + + if (fcntl(fd, F_SETFL, O_APPEND) < 0) + err(1, "Can't get fl flags"); + + if ((fl_flags1 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if ((fd_flags1 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fstatfs(fd, &statfs1) < 0) + err(1, "statfs issue"); + + if (write(fd, "hello", 5) != 5) + err(1, "write error"); + + pos1 = 3; + if (lseek(fd, pos1, SEEK_SET) < 0) + err(1, "seek error"); + + test_daemon(); + test_waitsig(); + + if ((fl_flags2 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if (fl_flags1 != fl_flags2) { + fail("fl flags differs %x %x", fl_flags1, fl_flags2); + return 1; + } + + if ((fd_flags2 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fd_flags1 != fd_flags2) { + fail("fd flags differs"); + return 1; + } + + if (fstatfs(fd, &statfs2) < 0) + err(1, "statfs issue"); + + if (statfs1.f_type != statfs2.f_type) { + fail("statfs.f_type differs"); + return 1; + } + + pos2 = lseek(fd, 0, SEEK_CUR); + if (pos1 != pos2) { + fail("position differs"); + return 1; + } + + if (pread(fd, buf, sizeof(buf), 0) != sizeof(buf)) { + fail("read problem"); + return 1; + } + + if (memcmp(buf, "hello", sizeof(buf))) { + fail("content mismatch"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/shmemfd.desc b/test/zdtm/static/shmemfd.desc new file mode 100644 index 0000000000..d969725f6d --- /dev/null +++ b/test/zdtm/static/shmemfd.desc @@ -0,0 +1 @@ +{'flavor': 'h ns', 'flags': 'suid'} From 419ccc90a6c81102fb3806c703b1773c49e7b7a5 Mon Sep 17 00:00:00 2001 From: Valeriy Vdovin Date: Mon, 3 Feb 2020 15:08:26 +0300 Subject: [PATCH 252/277] zdtm: Implemented get_current_dir_name wrapper that checks for 'x' permissions Any filesystem syscall, that needs to navigate to inode by it's absolute path performs successive lookup operations for each part of the path. Lookup operation includes access rights check. Usually but not always zdtm tests processes fall under 'other' access category. Also, usually directories don't have 'x' bit set for other. In case when bit 'x' is not set and user-ID and group-ID of a process relate it to 'other', test's will not succeed in performing these syscalls which are most of filesystem api, that has const char *path as part of it arguments (open, openat, mkdir, bind, etc). The observable behavior of that is that zdtm tests fail at file creation ops on one system and pass on the other. The above is not immediately clear to the developer by just looking at failed test's logs. Investigation of that is also not quick for a developer due to the complex structure of zdtm runtime where nested clones with NAMESPACE flags take place alongside with bind-mounts. As an additional note: 'get_current_dir_name' is documented as returning EACCESS in case when some part of the path lacks read/list permissions. But in fact it's not always so. Practice shows, that test processes can get false success on this operation only to fail on later call to something like mkdir/mknod/bind with a given path in arguments. 'get_cwd_check_perm' is a wrapper around 'get_current_dir_name'. It also checks for permissions on the given filepath and logs the error. This directs the developer towards the right investigation path or even eliminates the need for investigation completely. Signed-off-by: Valeriy Vdovin --- test/zdtm/lib/fs.c | 24 ++++++++++++++++++++++++ test/zdtm/lib/fs.h | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/test/zdtm/lib/fs.c b/test/zdtm/lib/fs.c index 0decfc37b7..e82011ec86 100644 --- a/test/zdtm/lib/fs.c +++ b/test/zdtm/lib/fs.c @@ -94,3 +94,27 @@ mnt_info_t *get_cwd_mnt_info(void) mnt_info_free(&m); goto out; } + +int get_cwd_check_perm(char **result) +{ + char *cwd; + *result = 0; + cwd = get_current_dir_name(); + if (!cwd) { + pr_perror("failed to get current directory"); + return -1; + } + + if (access(cwd, X_OK)) { + pr_err("access check for bit X for current dir path '%s' " + "failed for uid:%d,gid:%d, error: %d(%s). " + "Bit 'x' should be set in all path components of " + "this directory\n", + cwd, getuid(), getgid(), errno, strerror(errno) + ); + return -1; + } + + *result = cwd; + return 0; +} diff --git a/test/zdtm/lib/fs.h b/test/zdtm/lib/fs.h index 972b15abad..af7a665fb3 100644 --- a/test/zdtm/lib/fs.h +++ b/test/zdtm/lib/fs.h @@ -50,4 +50,28 @@ extern mnt_info_t *mnt_info_alloc(void); extern void mnt_info_free(mnt_info_t **m); extern mnt_info_t *get_cwd_mnt_info(void); +/* + * get_cwd_check_perm is called to check that cwd is actually usable for a calling + * process. + * + * Example output of a stat command on a '/root' path shows file access bits: + * > stat /root + * File: ‘/root’ + * ... + * Access: (0550/dr-xr-x---) Uid: ( 0/root) Gid: ( 0/root) + * ^- no 'x' bit for other + * + * Here we can see that '/root' dir (that often can be part of cwd path) does not + * allow non-root user and non-root group to list contents of this directory. + * Calling process matching 'other' access category may succeed getting cwd path, but will + * fail performing further filesystem operations based on this path with confusing errors. + * + * This function calls get_current_dir_name and explicitly checks that bit 'x' is enabled for + * a calling process and logs the error. + * + * If check passes, stores get_current_dir's result in *result and returns 0 + * If check fails, stores 0 in *result and returns -1 + */ +extern int get_cwd_check_perm(char **result); + #endif /* ZDTM_FS_H_ */ From f883e92e04706bd2658dd9734aacfdb746265ef0 Mon Sep 17 00:00:00 2001 From: Valeriy Vdovin Date: Mon, 3 Feb 2020 15:27:40 +0300 Subject: [PATCH 253/277] zdtm: Use safe helper function to initialize unix socket sockaddr structure The helper function removes code duplication from tests that want to initialize unix socket address to an absolute file path, derived from current working directory of the test + relative filename of a resulting socket. Because the former code used cwd = get_current_dir_name() as part of absolute filename generation, the resulting filepath could later cause failure of bind systcall due to unchecked permissions and introduce confusing permission errors. Signed-off-by: Valeriy Vdovin --- test/zdtm/lib/Makefile | 2 +- test/zdtm/lib/unix.c | 19 ++++++++++++++++ test/zdtm/lib/zdtmtst.h | 3 +++ test/zdtm/static/del_standalone_un.c | 17 +------------- test/zdtm/static/deleted_unix_sock.c | 19 ++-------------- test/zdtm/static/sk-unix01.c | 33 ++++++---------------------- 6 files changed, 33 insertions(+), 60 deletions(-) create mode 100644 test/zdtm/lib/unix.c diff --git a/test/zdtm/lib/Makefile b/test/zdtm/lib/Makefile index b87f36e8f2..89ca909332 100644 --- a/test/zdtm/lib/Makefile +++ b/test/zdtm/lib/Makefile @@ -4,7 +4,7 @@ CFLAGS += $(USERCFLAGS) LIB := libzdtmtst.a -LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c fs.c sysctl.c +LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c unix.c fs.c sysctl.c LIBOBJ := $(LIBSRC:%.c=%.o) BIN := groups diff --git a/test/zdtm/lib/unix.c b/test/zdtm/lib/unix.c new file mode 100644 index 0000000000..c36846cadd --- /dev/null +++ b/test/zdtm/lib/unix.c @@ -0,0 +1,19 @@ +#include +#include +#include "zdtmtst.h" +#include "fs.h" + +int unix_fill_sock_name(struct sockaddr_un *name, char *relFilename) +{ + char *cwd; + + if (get_cwd_check_perm(&cwd)) { + pr_err("failed to get current working directory with valid permissions.\n"); + return -1; + } + + name->sun_family = AF_LOCAL; + ssprintf(name->sun_path, "%s/%s", cwd, relFilename); + return 0; +} + diff --git a/test/zdtm/lib/zdtmtst.h b/test/zdtm/lib/zdtmtst.h index 2cd4bdd1dd..6eec266475 100644 --- a/test/zdtm/lib/zdtmtst.h +++ b/test/zdtm/lib/zdtmtst.h @@ -149,6 +149,9 @@ extern int tcp_init_server(int family, int *port); extern int tcp_accept_server(int sock); extern int tcp_init_client(int family, char *servIP, unsigned short servPort); +struct sockaddr_un; +extern int unix_fill_sock_name(struct sockaddr_un *name, char *relFilename); + struct zdtm_tcp_opts { bool reuseaddr; bool reuseport; diff --git a/test/zdtm/static/del_standalone_un.c b/test/zdtm/static/del_standalone_un.c index d8200068be..5426fc7865 100644 --- a/test/zdtm/static/del_standalone_un.c +++ b/test/zdtm/static/del_standalone_un.c @@ -16,19 +16,6 @@ const char *test_author = "Tycho Andersen "; char *dirname; TEST_OPTION(dirname, string, "directory name", 1); -static int fill_sock_name(struct sockaddr_un *name, const char *filename) -{ - char *cwd; - - cwd = get_current_dir_name(); - if (strlen(filename) + strlen(cwd) + 1 >= sizeof(name->sun_path)) - return -1; - - name->sun_family = AF_LOCAL; - ssprintf(name->sun_path, "%s/%s", cwd, filename); - return 0; -} - static int bind_and_listen(struct sockaddr_un *addr) { int sk; @@ -71,10 +58,8 @@ int main(int argc, char **argv) goto out; } - if (fill_sock_name(&addr, filename) < 0) { - pr_err("filename \"%s\" is too long\n", filename); + if (unix_fill_sock_name(&addr, filename)) goto out; - } sk1 = bind_and_listen(&addr); if (sk1 < 0) diff --git a/test/zdtm/static/deleted_unix_sock.c b/test/zdtm/static/deleted_unix_sock.c index bcc33f3dec..4d328e9966 100644 --- a/test/zdtm/static/deleted_unix_sock.c +++ b/test/zdtm/static/deleted_unix_sock.c @@ -17,28 +17,13 @@ const char *test_author = "Roman Kagan "; char *filename; TEST_OPTION(filename, string, "file name", 1); -static int fill_sock_name(struct sockaddr_un *name, const char *filename) -{ - char *cwd; - - cwd = get_current_dir_name(); - if (strlen(filename) + strlen(cwd) + 1 >= sizeof(name->sun_path)) - return -1; - - name->sun_family = AF_LOCAL; - sprintf(name->sun_path, "%s/%s", cwd, filename); - return 0; -} - static int setup_srv_sock(void) { struct sockaddr_un name; int sock; - if (fill_sock_name(&name, filename) < 0) { - pr_perror("filename \"%s\" is too long", filename); + if (unix_fill_sock_name(&name, filename)) return -1; - } sock = socket(PF_LOCAL, SOCK_STREAM, 0); if (sock < 0) { @@ -67,7 +52,7 @@ static int setup_clnt_sock(void) struct sockaddr_un name; int sock; - if (fill_sock_name(&name, filename) < 0) + if (unix_fill_sock_name(&name, filename)) return -1; sock = socket(PF_LOCAL, SOCK_STREAM, 0); diff --git a/test/zdtm/static/sk-unix01.c b/test/zdtm/static/sk-unix01.c index 2bceef79a7..0e9006a152 100644 --- a/test/zdtm/static/sk-unix01.c +++ b/test/zdtm/static/sk-unix01.c @@ -24,22 +24,6 @@ const char *test_author = "Cyrill Gorcunov "; char *dirname; TEST_OPTION(dirname, string, "directory name", 1); -static int fill_sock_name(struct sockaddr_un *name, const char *filename) -{ - char *cwd; - - cwd = get_current_dir_name(); - if (strlen(filename) + strlen(cwd) + 1 >= sizeof(name->sun_path)) { - pr_err("Name %s/%s is too long for socket\n", - cwd, filename); - return -1; - } - - name->sun_family = AF_LOCAL; - ssprintf(name->sun_path, "%s/%s", cwd, filename); - return 0; -} - static int sk_alloc_bind(int type, struct sockaddr_un *addr) { int sk; @@ -155,10 +139,9 @@ int main(int argc, char **argv) */ ssprintf(filename, "%s/%s", subdir_dg, "sk-dt"); - if (fill_sock_name(&addr, filename) < 0) { - pr_err("%s is too long for socket\n", filename); + if (unix_fill_sock_name(&addr, filename)) return 1; - } + unlink(addr.sun_path); sk_dgram[0] = sk_alloc_bind(SOCK_DGRAM, &addr); @@ -184,10 +167,9 @@ int main(int argc, char **argv) test_msg("sk-dt: alloc/connect/unlink %d %s\n", sk_dgram[3], addr.sun_path); ssprintf(filename, "%s/%s", dirname, "sole"); - if (fill_sock_name(&addr, filename) < 0) { - pr_err("%s is too long for socket\n", filename); + if (unix_fill_sock_name(&addr, filename)) return 1; - } + unlink(addr.sun_path); sk_dgram[4] = sk_alloc_bind(SOCK_DGRAM, &addr); @@ -237,7 +219,7 @@ int main(int argc, char **argv) sk_dgram_pair[0], sk_dgram_pair[1]); ssprintf(filename, "%s/%s", subdir_dg, "sk-dtp"); - if (fill_sock_name(&addr, filename) < 0) { + if (unix_fill_sock_name(&addr, filename)) { pr_err("%s is too long for socket\n", filename); return 1; } @@ -270,10 +252,9 @@ int main(int argc, char **argv) * - delete socket on fs */ ssprintf(filename, "%s/%s", subdir_st, "sk-st"); - if (fill_sock_name(&addr, filename) < 0) { - pr_err("%s is too long for socket\n", filename); + if (unix_fill_sock_name(&addr, filename)) return 1; - } + unlink(addr.sun_path); sk_st[0] = sk_alloc_bind(SOCK_STREAM, &addr); From 49cc6cecd18beb5fcb699636960320901096078a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 21:58:55 +0300 Subject: [PATCH 254/277] pipe: restore pipe size even if a pipe is empty Without this patch, pipe size is restored only if a pipe has queued data. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/pipes.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/criu/pipes.c b/criu/pipes.c index cb5da71de4..d74329161b 100644 --- a/criu/pipes.c +++ b/criu/pipes.c @@ -160,24 +160,24 @@ int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst **hash return 0; } - if (!pd->pde->bytes) - goto out; - - if (!pd->data) { - pr_err("Double data restore occurred on %#x\n", id); - return -1; - } - if (pd->pde->has_size) { pr_info("Restoring size %#x for %#x\n", pd->pde->size, pd->pde->pipe_id); ret = fcntl(pfd, F_SETPIPE_SZ, pd->pde->size); if (ret < 0) { pr_perror("Can't restore pipe size"); - goto err; + return -1; } } + if (!pd->pde->bytes) + return 0; + + if (!pd->data) { + pr_err("Double data restore occurred on %#x\n", id); + return -1; + } + iov.iov_base = pd->data; iov.iov_len = pd->pde->bytes; @@ -185,14 +185,13 @@ int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst **hash ret = vmsplice(pfd, &iov, 1, SPLICE_F_GIFT | SPLICE_F_NONBLOCK); if (ret < 0) { pr_perror("%#x: Error splicing data", id); - goto err; + return -1; } if (ret == 0 || ret > iov.iov_len /* sanity */) { pr_err("%#x: Wanted to restore %zu bytes, but got %d\n", id, iov.iov_len, ret); - ret = -1; - goto err; + return -1; } iov.iov_base += ret; @@ -211,10 +210,7 @@ int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst **hash munmap(pd->data, pd->pde->bytes); pd->data = NULL; -out: - ret = 0; -err: - return ret; + return 0; } static int userns_reopen(void *_arg, int fd, pid_t pid) From b00267eaf32753ff8dea9cd6a8011ef4cbc0f333 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 22:08:38 +0300 Subject: [PATCH 255/277] test/pipe03: check that pipe size is restored Create two pipes with and without queued data. Signed-off-by: Andrei Vagin --- test/zdtm/static/pipe03.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/test/zdtm/static/pipe03.c b/test/zdtm/static/pipe03.c index a8721e934f..d649007b70 100644 --- a/test/zdtm/static/pipe03.c +++ b/test/zdtm/static/pipe03.c @@ -13,27 +13,28 @@ const char *test_author = "Andrei Vagin "; int main(int argc, char **argv) { - int p[2], i; + int p[2][2], i; uint8_t buf[BUF_SIZE]; uint32_t crc; test_init(argc, argv); - if (pipe2(p, O_NONBLOCK)) { - pr_perror("pipe"); - return 1; - } - - if (fcntl(p[1], F_SETPIPE_SZ, DATA_SIZE) == -1) { - pr_perror("Unable to change a pipe size"); - return 1; + for (i = 0; i < 2; i++) { + if (pipe2(p[i], O_NONBLOCK)) { + pr_perror("pipe"); + return 1; + } + if (fcntl(p[i][1], F_SETPIPE_SZ, DATA_SIZE) == -1) { + pr_perror("Unable to change a pipe size"); + return 1; + } } crc = ~0; datagen(buf, BUF_SIZE, &crc); for (i = 0; i < DATA_SIZE / BUF_SIZE; i++) { - if (write(p[1], buf, BUF_SIZE) != BUF_SIZE) { + if (write(p[0][1], buf, BUF_SIZE) != BUF_SIZE) { pr_perror("write"); return 1; } @@ -43,12 +44,26 @@ int main(int argc, char **argv) test_waitsig(); for (i = 0; i < DATA_SIZE / BUF_SIZE; i++) { - if (read(p[0], buf, BUF_SIZE) != BUF_SIZE) { + if (read(p[0][0], buf, BUF_SIZE) != BUF_SIZE) { pr_perror("read"); return 1; } } + for (i = 0; i < 2; i++) { + int size; + + size = fcntl(p[i][1], F_GETPIPE_SZ); + if (size < 0) { + pr_perror("Unable to get a pipe size"); + return 1; + } + if (size != DATA_SIZE) { + fail("%d: size %d expected %d", i, size, DATA_SIZE); + return 1; + } + } + pass(); return 0; } From d2b99a24417899b430f2b54f05bd9fdf52d0a21a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 22:11:28 +0300 Subject: [PATCH 256/277] test/fifo_loop: change sizes of all fifo-s to fit a test buffer This test doesn't expect that the write operation will block. Signed-off-by: Andrei Vagin --- test/zdtm/transition/fifo_loop.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/zdtm/transition/fifo_loop.c b/test/zdtm/transition/fifo_loop.c index b028c2fd5f..b065925867 100644 --- a/test/zdtm/transition/fifo_loop.c +++ b/test/zdtm/transition/fifo_loop.c @@ -84,6 +84,14 @@ int main(int argc, char **argv) ret = errno; return ret; } + + pipe_size = fcntl(writefd, F_SETPIPE_SZ, sizeof(buf)); + if (pipe_size != sizeof(buf)) { + pr_perror("fcntl(writefd, F_SETPIPE_SZ) -> %d", pipe_size); + kill(0, SIGKILL); + exit(1); + } + signal(SIGPIPE, SIG_IGN); if (pipe_in2out(readfd, writefd, buf, sizeof(buf)) < 0) /* pass errno as exit code to the parent */ @@ -107,7 +115,7 @@ int main(int argc, char **argv) pipe_size = fcntl(writefd, F_SETPIPE_SZ, sizeof(buf)); if (pipe_size != sizeof(buf)) { - pr_perror("fcntl(writefd, F_GETPIPE_SZ) -> %d", pipe_size); + pr_perror("fcntl(writefd, F_SETPIPE_SZ) -> %d", pipe_size); kill(0, SIGKILL); exit(1); } From ff6c7d90ec006775eea6b303553972ca0e5ca647 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 5 Apr 2017 19:28:11 +0300 Subject: [PATCH 257/277] vz7: sockets: Separate socket buffer size setting into a helper It is known that for unix sockets we need to setup qlen first before restoring queue itself, otherwise there might be not enough place for data. Thus move the snippet into separate helper. Most likely we will merge it back again once it's been proved that for any kind of sockets it is safe to setup all options before queue data itself. https://jira.sw.ru/browse/PSBM-63762 Signed-off-by: Cyrill Gorcunov --- criu/include/sockets.h | 1 + criu/sockets.c | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/criu/include/sockets.h b/criu/include/sockets.h index cd98d18e06..2117ab3e83 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -27,6 +27,7 @@ struct socket_desc { extern int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *); extern int dump_socket_opts(int sk, SkOptsEntry *soe); extern int restore_socket_opts(int sk, SkOptsEntry *soe); +extern int restore_socket_bufsz(int sk, SkOptsEntry *soe); extern void release_skopts(SkOptsEntry *); extern int restore_prepare_socket(int sk); extern void preload_socket_modules(void); diff --git a/criu/sockets.c b/criu/sockets.c index 2e1ce9d7bc..9f9ea49f3c 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -522,17 +522,23 @@ int restore_prepare_socket(int sk) return 0; } -int restore_socket_opts(int sk, SkOptsEntry *soe) +int restore_socket_bufsz(int sk, SkOptsEntry *soe) { - int ret = 0, val = 1; - struct timeval tv; /* In kernel a bufsize value is doubled. */ - u32 bufs[2] = { soe->so_sndbuf / 2, soe->so_rcvbuf / 2}; + uint32_t bufs[2] = { soe->so_sndbuf / 2, soe->so_rcvbuf / 2}; pr_info("%d restore sndbuf %d rcv buf %d\n", sk, soe->so_sndbuf, soe->so_rcvbuf); /* setsockopt() multiplies the input values by 2 */ - ret |= userns_call(sk_setbufs, UNS_ASYNC, bufs, sizeof(bufs), sk); + return userns_call(sk_setbufs, UNS_ASYNC, bufs, sizeof(bufs), sk); +} + +int restore_socket_opts(int sk, SkOptsEntry *soe) +{ + int ret = 0, val; + struct timeval tv; + + ret |= restore_socket_bufsz(sk, soe); if (soe->has_so_priority) { pr_debug("\trestore priority %d for socket\n", soe->so_priority); From f42491ef8e3d9f0d4f605fe9acebfad05e2ebb84 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Mar 2018 18:05:46 +0300 Subject: [PATCH 258/277] sk-unix -- Restore qlen before pushing queued data back The size of qlen may be bigger than one provided by kernel as default, so pushing data may lead to Error (criu/sk-queue.c:500): Failed to send packet: Resource temporarily unavailable ie to -EAGAIN. Signed-off-by: Cyrill Gorcunov Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- criu/sk-unix.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 048ff44ae9..49a5df919a 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1102,10 +1102,13 @@ static bool peer_is_not_prepared(struct unix_sk_info *peer) return (!peer->listen); } -static int restore_unix_queue(int fd, struct unix_sk_info *peer) +static int restore_unix_queue(int fd, SkOptsEntry *soe, struct unix_sk_info *peer) { struct pstree_item *task; + if (restore_socket_bufsz(fd, soe)) + return -1; + if (restore_sk_queue(fd, peer->ue->id)) return -1; if (peer->queuer) @@ -1335,7 +1338,7 @@ static int post_open_standalone(struct file_desc *d, int fd) restore_queue: if (peer->queuer == ui && !(peer->ue->uflags & USK_EXTERN) && - restore_unix_queue(fd, peer)) + restore_unix_queue(fd, ui->ue->opts, peer)) return -1; restore_sk_common: if (ui->queuer && !ui->queuer->peer_queue_restored) @@ -1637,10 +1640,10 @@ static int post_open_interconnected_master(struct unix_sk_info *ui) if (chk_restored_scms(ui) || chk_restored_scms(peer)) return 0; - if (restore_unix_queue(fle->fe->fd, peer)) + if (restore_unix_queue(fle->fe->fd, ui->ue->opts, peer)) return -1; - if (restore_unix_queue(fle_peer->fe->fd, ui)) + if (restore_unix_queue(fle_peer->fe->fd, peer->ue->opts, ui)) return -1; if (restore_sk_common(fle->fe->fd, ui)) From c6d3f25dab6be19885ba173ccd73bcaac16e14d2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 29 Aug 2018 16:44:54 +0300 Subject: [PATCH 259/277] unix: image -- Move uflags into enum This values are part of abi, so must be set in image file. Signed-off-by: Cyrill Gorcunov Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- criu/sk-unix.c | 31 +++++++++++++------------------ images/sk-unix.proto | 10 ++++++++++ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 49a5df919a..382be9a0ad 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -54,11 +54,6 @@ * as "external" and require the --ext-unix-sk option. */ -#define USK_EXTERN (1 << 0) -#define USK_SERVICE (1 << 1) -#define USK_CALLBACK (1 << 2) -#define USK_INHERIT (1 << 3) - #define FAKE_INO 0 struct unix_sk_desc { @@ -393,7 +388,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) if (unlikely(ue->peer == service_sk_ino)) { ue->state = TCP_CLOSE; ue->peer = 0; - ue->uflags |= USK_SERVICE; + ue->uflags |= UNIX_UFLAGS__SERVICE; } if (sk->namelen && *sk->name) { @@ -528,7 +523,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) * Postpone writing the entry if a peer isn't found yet. * It's required, because we may need to modify the entry. * For example, if a socket is external and is dumped by - * a callback, the USK_CALLBACK flag must be set. + * a callback, the UNIX_UFLAGS__CALLBACK flag must be set. */ if (list_empty(&sk->peer_node) && write_unix_entry(sk)) return -1; @@ -793,7 +788,7 @@ static int __dump_external_socket(struct unix_sk_desc *sk, return -1; if (ret == 0) { - sk->ue->uflags |= USK_CALLBACK; + sk->ue->uflags |= UNIX_UFLAGS__CALLBACK; return 0; } @@ -870,7 +865,7 @@ int fix_external_unix_sockets(void) e.state = TCP_LISTEN; e.name.data = (void *)sk->name; e.name.len = (size_t)sk->namelen; - e.uflags = USK_EXTERN; + e.uflags = UNIX_UFLAGS__EXTERN; e.peer = 0; e.fown = &fown; e.opts = &skopts; @@ -1269,7 +1264,7 @@ static int post_open_standalone(struct file_desc *d, int fd) ui = container_of(d, struct unix_sk_info, d); BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) || - (ui->ue->uflags & (USK_CALLBACK | USK_INHERIT))); + (ui->ue->uflags & (UNIX_UFLAGS__CALLBACK | UNIX_UFLAGS__INHERIT))); if (chk_restored_scms(ui)) return 1; @@ -1337,7 +1332,7 @@ static int post_open_standalone(struct file_desc *d, int fd) restore_queue: if (peer->queuer == ui && - !(peer->ue->uflags & USK_EXTERN) && + !(peer->ue->uflags & UNIX_UFLAGS__EXTERN) && restore_unix_queue(fd, ui->ue->opts, peer)) return -1; restore_sk_common: @@ -1785,7 +1780,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) return post_open_standalone(&ui->d, fle->fe->fd); /* Fake socket will be restored by its peer */ - if (!(ui->ue->uflags & USK_EXTERN) && ui->ue->ino == FAKE_INO) + if (!(ui->ue->uflags & UNIX_UFLAGS__EXTERN) && ui->ue->ino == FAKE_INO) return 1; if (set_netns(ui->ue->ns_id)) @@ -1796,7 +1791,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) * If so, put response, that dumping and restoring * was successful. */ - if (ui->ue->uflags & USK_SERVICE) { + if (ui->ue->uflags & UNIX_UFLAGS__SERVICE) { int sks[2]; if (socketpair(PF_UNIX, ui->ue->type, 0, sks)) { @@ -1866,7 +1861,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) sk = sks[0]; } else { - if (ui->ue->uflags & USK_CALLBACK) { + if (ui->ue->uflags & UNIX_UFLAGS__CALLBACK) { sk = run_plugins(RESTORE_UNIX_SK, ui->ue->ino); if (sk >= 0) goto out; @@ -1876,7 +1871,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) * Connect to external sockets requires * special option to be passed. */ - if (ui->peer && (ui->peer->ue->uflags & USK_EXTERN) && + if (ui->peer && (ui->peer->ue->uflags & UNIX_UFLAGS__EXTERN) && !(opts.ext_unix_sk)) { pr_err("External socket found in image. " "Consider using the --" USK_EXT_PARAM @@ -1935,7 +1930,7 @@ static int open_unix_sk(struct file_desc *d, int *new_fd) ui = container_of(d, struct unix_sk_info, d); if (inherited_fd(d, new_fd)) { - ui->ue->uflags |= USK_INHERIT; + ui->ue->uflags |= UNIX_UFLAGS__INHERIT; ret = *new_fd >= 0 ? 0 : -1; } else if (ui->flags & USK_PAIR_MASTER) ret = open_unixsk_pair_master(ui, new_fd); @@ -1976,7 +1971,7 @@ static int unlink_sk(struct unix_sk_info *ui) { int ret = 0, cwd_fd = -1, root_fd = -1, ns_fd = -1; - if (!ui->name || ui->name[0] == '\0' || (ui->ue->uflags & USK_EXTERN)) + if (!ui->name || ui->name[0] == '\0' || (ui->ue->uflags & UNIX_UFLAGS__EXTERN)) return 0; if (prep_unix_sk_cwd(ui, &cwd_fd, &root_fd, NULL)) @@ -2209,7 +2204,7 @@ int add_fake_unix_queuers(void) struct unix_sk_info *ui; list_for_each_entry(ui, &unix_sockets, list) { - if ((ui->ue->uflags & (USK_EXTERN | USK_CALLBACK)) || ui->queuer) + if ((ui->ue->uflags & (UNIX_UFLAGS__EXTERN | UNIX_UFLAGS__CALLBACK)) || ui->queuer) continue; if (!(ui->ue->state == TCP_ESTABLISHED && !ui->peer) && ui->ue->type != SOCK_DGRAM) diff --git a/images/sk-unix.proto b/images/sk-unix.proto index c59644f6ea..fa9121fb3f 100644 --- a/images/sk-unix.proto +++ b/images/sk-unix.proto @@ -10,6 +10,16 @@ message file_perms_entry { required uint32 gid = 3; } +/* + * Bitmask for unix_sk_entry::uflags + */ +enum unix_uflags { + EXTERN = 1; + SERVICE = 2; + CALLBACK = 4; + INHERIT = 8; +} + message unix_sk_entry { /* * Few words about why we need both -- id and ino. From 1b85265d601996765176595aaa248540c2c9ed5d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 9 Jun 2018 16:26:06 +0300 Subject: [PATCH 260/277] unix: Collect bindmounted unix sockets Mount points might be beindmount to some resources (say unix binded sockets) thus when times come to do real bind mount call we need to prepare appropriate resource first. On dump procedure we walk over all bind-mounts and check if the mountpoint is a unix socket saving the mnt_id into the image then. To distinguish such sockets from others we use UNIX_UFLAGS__BINDMOUNT flag. Note at moment we support only DGRAM closed sockets. Signed-off-by: Cyrill Gorcunov --- criu/cr-dump.c | 3 ++ criu/include/sockets.h | 1 + criu/sk-unix.c | 91 +++++++++++++++++++++++++++++++++++++++--- images/sk-unix.proto | 1 + 4 files changed, 91 insertions(+), 5 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index a8188724f2..7b46e663b5 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1876,6 +1876,9 @@ int cr_dump_tasks(pid_t pid) if (collect_namespaces(true) < 0) goto err; + if (collect_unix_bindmounts() < 0) + goto err; + glob_imgset = cr_glob_imgset_open(O_DUMP); if (!glob_imgset) goto err; diff --git a/criu/include/sockets.h b/criu/include/sockets.h index 2117ab3e83..f46a252e54 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -43,6 +43,7 @@ extern int add_fake_unix_queuers(void); extern int fix_external_unix_sockets(void); extern int prepare_scms(void); extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids); +extern int collect_unix_bindmounts(void); extern struct collect_image_info netlink_sk_cinfo; diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 382be9a0ad..f79238f982 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -74,6 +74,9 @@ struct unix_sk_desc { unsigned char shutdown; bool deleted; + bool bindmount; + unsigned int mnt_id; + mode_t mode; uid_t uid; gid_t gid; @@ -381,6 +384,9 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) if (unix_resolve_name(lfd, id, sk, ue, p)) goto err; + if (sk->bindmount) + ue->uflags |= UNIX_UFLAGS__BINDMOUNT; + /* * Check if this socket is connected to criu service. * Dump it like closed one and mark it for restore. @@ -568,11 +574,16 @@ static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d, if (d->namelen == 0 || name[0] == '\0') return 0; - if (kdat.sk_unix_file && (root_ns_mask & CLONE_NEWNS)) { - if (get_mnt_id(lfd, &mnt_id)) - return -1; - ue->mnt_id = mnt_id; - ue->has_mnt_id = mnt_id; + if (!d->bindmount) { + if (kdat.sk_unix_file && (root_ns_mask & CLONE_NEWNS)) { + if (get_mnt_id(lfd, &mnt_id)) + return -1; + ue->mnt_id = mnt_id; + ue->has_mnt_id = mnt_id; + } + } else { + ue->mnt_id = d->mnt_id; + ue->has_mnt_id = true; } if (ue->mnt_id >= 0) @@ -693,6 +704,7 @@ static int unix_collect_one(const struct unix_diag_msg *m, INIT_LIST_HEAD(&d->peer_list); INIT_LIST_HEAD(&d->peer_node); d->fd = -1; + d->mnt_id = -1; if (tb[UNIX_DIAG_SHUTDOWN]) d->shutdown = nla_get_u8(tb[UNIX_DIAG_SHUTDOWN]); @@ -888,6 +900,75 @@ int fix_external_unix_sockets(void) return -1; } +int collect_unix_bindmounts(void) +{ + struct mount_info *mi; + struct stat st = {}; + int ns_old = -1; + int ret = 0; + + pr_debug("Collecting unix bindmounts\n"); + + for (mi = mntinfo; mi; mi = mi->next) { + if (list_empty(&mi->mnt_bind)) + continue; + + if (switch_ns(mi->nsid->ns_pid, &mnt_ns_desc, &ns_old) < 0) { + pr_err("Can't switch ns to mnt_id %d", mi->mnt_id); + if (restore_ns(ns_old, &mnt_ns_desc)) { + pr_err("Can't switch mount ns back from mnt_id %d\n", mi->mnt_id); + return -1; + } + return -1; + } + + if (stat(mi->mountpoint, &st)) { + pr_warn("Can't stat on %s: %m\n", mi->mountpoint); + if (restore_ns(ns_old, &mnt_ns_desc)) { + pr_err("Can't switch mount ns back from mnt_id %d\n", mi->mnt_id); + return -1; + } + continue; + } + + if (S_ISSOCK(st.st_mode)) { + struct unix_sk_desc *sk; + + list_for_each_entry(sk, &unix_sockets, list) { + if (sk->vfs_ino == (int)st.st_ino && + sk->vfs_dev == (int)st.st_dev) { + pr_debug("Found sock s_dev %#x ino %d bindmounted mnt_id %d %s\n", + (int)st.st_dev, (int)st.st_ino, mi->mnt_id, mi->mountpoint); + if (sk->bindmount) { + pr_err("Many bindings for sockets are not yet supported %d at %s\n", + (int)st.st_ino, mi->mountpoint); + ret = -1; + } else { + sk->mnt_id = mi->mnt_id; + sk->bindmount = true; + } + if (sk->type != SOCK_DGRAM && sk->state != TCP_CLOSE) { + pr_err("Unsupported bindmounted socket ino %d at %s\n", + (int)st.st_ino, mi->mountpoint); + ret = -1; + } + break; + } + } + } + + if (restore_ns(ns_old, &mnt_ns_desc)) { + pr_err("Can't switch mount ns back from %d\n", mi->nsid->ns_pid); + return -1; + } + + if (ret) + break; + } + + return ret; +} + struct unix_sk_info { UnixSkEntry *ue; struct list_head list; diff --git a/images/sk-unix.proto b/images/sk-unix.proto index fa9121fb3f..c24ca92fc8 100644 --- a/images/sk-unix.proto +++ b/images/sk-unix.proto @@ -18,6 +18,7 @@ enum unix_uflags { SERVICE = 2; CALLBACK = 4; INHERIT = 8; + BINDMOUNT = 16; } message unix_sk_entry { From 9d83f947269722fa2f723d948b75d02be422b660 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 9 Jun 2018 16:26:07 +0300 Subject: [PATCH 261/277] unix: Collect images before opening mountpoints Because we need to gather unix sockets earlier than we start creating mount tree. Thus we will be able to handle bindmounted sockets. Signed-off-by: Cyrill Gorcunov --- criu/cr-restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index e5a827753a..afef3c1a9e 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -273,7 +273,6 @@ static struct collect_image_info *cinfos[] = { }; static struct collect_image_info *cinfos_files[] = { - &unix_sk_cinfo, &fifo_cinfo, &pipe_cinfo, &nsfile_cinfo, @@ -297,6 +296,7 @@ static struct collect_image_info *cinfos_files[] = { static struct collect_image_info *before_ns_cinfos[] = { &tty_info_cinfo, /* Restore devpts content */ &tty_cdata, + &unix_sk_cinfo, }; static struct pprep_head *post_prepare_heads = NULL; From b2b2bfbd6b9e25dc800587ccb4bf38f6b263ec66 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 9 Jun 2018 16:26:08 +0300 Subject: [PATCH 262/277] unix: Collect bindmounted unix sockets into own list So when we will be examinating mount points and sockets we won't waste time on non-bindmounted. Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index f79238f982..764480aea6 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -98,6 +98,7 @@ static mutex_t *mutex_ghost; static LIST_HEAD(unix_sockets); static LIST_HEAD(unix_ghost_addr); +static LIST_HEAD(unix_mnt_sockets); static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d, UnixSkEntry *ue, const struct fd_parms *p); @@ -972,6 +973,7 @@ int collect_unix_bindmounts(void) struct unix_sk_info { UnixSkEntry *ue; struct list_head list; + struct list_head mnt_list; char *name; char *name_dir; unsigned flags; @@ -2119,6 +2121,7 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue) memzero(&ui->d, sizeof(ui->d)); INIT_LIST_HEAD(&ui->list); + INIT_LIST_HEAD(&ui->mnt_list); INIT_LIST_HEAD(&ui->connected); INIT_LIST_HEAD(&ui->node); INIT_LIST_HEAD(&ui->scm_fles); @@ -2214,6 +2217,9 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) list_add_tail(&ui->ghost_node, &unix_ghost_addr); } + if (ui->ue->uflags & UNIX_UFLAGS__BINDMOUNT) + list_add_tail(&ui->mnt_list, &unix_mnt_sockets); + list_add_tail(&ui->list, &unix_sockets); return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops); } From 5dda3014ca2d91b1b180a503a78de3a6ecad8146 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 9 Jun 2018 16:26:09 +0300 Subject: [PATCH 263/277] unix: Move shared data init into early stage We will need to take mutex when bind() bindmounted sockets. Strictly speaking we won't support bindmounted and deleted sockets for now but better prepare this scaffolds early. Signed-off-by: Cyrill Gorcunov --- criu/cr-restore.c | 3 +++ criu/include/sockets.h | 1 + criu/sk-unix.c | 10 +++++++--- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index afef3c1a9e..c79fe94779 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -253,6 +253,9 @@ static int crtools_prepare_shared(void) if (prepare_cgroup()) return -1; + if (unix_prepare_shared()) + return -1; + return 0; } diff --git a/criu/include/sockets.h b/criu/include/sockets.h index f46a252e54..038bbd5e6c 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -63,6 +63,7 @@ extern int netlink_receive_one(struct nlmsghdr *hdr, struct ns_id *ns, void *arg extern int unix_sk_id_add(unsigned int ino); extern int unix_sk_ids_parse(char *optarg); extern int unix_prepare_root_shared(void); +extern int unix_prepare_shared(void); extern int do_dump_opt(int sk, int level, int name, void *val, int len); #define dump_opt(s, l, n, f) do_dump_opt(s, l, n, f, sizeof(*f)) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 764480aea6..042b3866d8 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -2130,16 +2130,20 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue) return 0; } -int unix_prepare_root_shared(void) +int unix_prepare_shared(void) { - struct unix_sk_info *ui; - mutex_ghost = shmalloc(sizeof(*mutex_ghost)); if (!mutex_ghost) { pr_err("ghost: Can't allocate mutex\n"); return -ENOMEM; } mutex_init(mutex_ghost); + return 0; +} + +int unix_prepare_root_shared(void) +{ + struct unix_sk_info *ui; pr_debug("ghost: Resolving addresses\n"); From b2ee9fab0739580f448ffad700d46a111b74e8d9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 29 Aug 2018 17:09:29 +0300 Subject: [PATCH 264/277] unix: Add support for bindmounted sockets Some unix sockets might be bindmounted (say /dev/log bound to another place). So to handle it we need to change the logic we open such sockets especially because we create mount tree earlier than we start to restore files. Thus here what we do: - on dump mark such sockets with UNIX_UFLAGS__BINDMOUNT flag so we would distinguish them on restore; - collect unix sockets before creating mount tree; note that at this moment we able to simply gather this sockets into own @unix_mnt_sockets list and nothing more because setting up the peers and such happens later in that named post action procedures; - when we need to create a bindmount point we enter into unix engine and figure out if there a socket to bindmount over; if found we pre-allocate the socketpair, bind it and save inside fdstore engine; using socketpair is important because later we need both peers to restore queued data; - finally when we start restoring files we simply fetch the socket from the fdstore and use it directly. All this scheme is working simply because we support dgram standalone sockets only, adding support for streamed sockets requires a way more engine rework and hopefully we won't need it in near future. Signed-off-by: Cyrill Gorcunov --- criu/include/sockets.h | 2 + criu/mount.c | 8 ++ criu/sk-unix.c | 277 ++++++++++++++++++++++++++++++++--------- 3 files changed, 229 insertions(+), 58 deletions(-) diff --git a/criu/include/sockets.h b/criu/include/sockets.h index 038bbd5e6c..dcb1dd9e46 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -10,6 +10,7 @@ struct fdinfo_list_entry; struct sk_opts_entry; +struct mount_info; struct file_desc; struct fd_parms; struct cr_imgset; @@ -44,6 +45,7 @@ extern int fix_external_unix_sockets(void); extern int prepare_scms(void); extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids); extern int collect_unix_bindmounts(void); +extern int unix_prepare_bindmount(struct mount_info *mi); extern struct collect_image_info netlink_sk_cinfo; diff --git a/criu/mount.c b/criu/mount.c index 180f2a62dc..46395b7c8b 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -28,6 +28,7 @@ #include "clone-noasan.h" #include "fdstore.h" +#include "sockets.h" #include "images/mnt.pb-c.h" /* @@ -2615,6 +2616,13 @@ static int try_remap_mount(struct mount_info *m) struct mnt_remap_entry *r; if (!mnt_needs_remap(m)) + + if (unix_prepare_bindmount(mi)) { + pr_err("Failed to prepare bindmount on unix at %s\n", + mi->mountpoint); + goto err; + } + return 0; BUG_ON(!m->parent); diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 042b3866d8..f0661e9b52 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -977,7 +977,10 @@ struct unix_sk_info { char *name; char *name_dir; unsigned flags; - int fdstore_id; + union { + int fdstore_id; + int fdstore_mnt_id[2]; + }; struct unix_sk_info *peer; struct pprep_head peer_resolve; /* XXX : union with the above? */ struct file_desc d; @@ -1011,6 +1014,8 @@ struct scm_fle { #define USK_PAIR_MASTER 0x1 #define USK_PAIR_SLAVE 0x2 #define USK_GHOST_FDSTORE 0x4 /* bound but removed address */ +#define USK_BINDMOUNT 0x8 /* socket is pre-openeded for bindmount reason */ +#define USK_NOCWD 0x10 /* no cwd switch */ static struct unix_sk_info *find_unix_sk_by_ino(int ino) { @@ -1233,6 +1238,9 @@ static int revert_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd, int *ro { int ret = 0; + if (ui->flags & USK_NOCWD) + return 0; + if (*ns_fd >= 0 && restore_ns(*ns_fd, &mnt_ns_desc)) ret = -1; if (*root_fd >= 0) { @@ -1260,6 +1268,9 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd, static struct ns_id *root = NULL, *ns; int fd; + if (ui->flags & USK_NOCWD) + return 0; + if (prev_mntns_fd && ui->name[0] && ui->ue->mnt_id >= 0) { struct ns_id *mntns = lookup_nsid_by_mnt_id(ui->ue->mnt_id); int ns_fd; @@ -1835,12 +1846,68 @@ static int setup_second_end(int *sks, struct fdinfo_list_entry *second_end) return 0; } +static int break_connected(struct unix_sk_info *ui, int sk) +{ + if (ui->ue->type == SOCK_DGRAM) { + struct sockaddr_un addr = { .sun_family = AF_UNSPEC }; + /* + * socketpair() assigns sks[1] as a peer of sks[0] + * (and vice versa). But in this case (not zero peer) + * it's impossible for other sockets to connect + * to sks[0] (see unix_dgram_connect()->unix_may_send()). + * The below is hack: we use that connect with AF_UNSPEC + * clears socket's peer. + * Note, that connect hack flushes receive queue, + * so restore_unix_queue() must be after it. + */ + if (connect(sk, (struct sockaddr *)&addr, sizeof(addr.sun_family))) { + pr_perror("Can't clear socket id %#x peer", ui->ue->id); + return -1; + } + } + return 0; +} + +static int make_socket(struct unix_sk_info *ui, int sks[2], bool pair, bool disjoin_master) +{ + if (unlikely(ui->flags & USK_BINDMOUNT)) { + sks[0] = fdstore_get(ui->fdstore_mnt_id[0]); + sks[1] = fdstore_get(ui->fdstore_mnt_id[1]); + if (sks[0] < 0 || sks[1] < 0) { + pr_err("bindmount: Can't fetch id %#x socketpair from the store\n", + ui->ue->id); + return -1; + } + } else { + int ret; + + sks[0] = sks[1] = -1; + if (!pair) { + ret = socket(PF_UNIX, ui->ue->type, 0); + sks[0] = ret; + } else + ret = socketpair(PF_UNIX, ui->ue->type, 0, sks); + + if (ret < 0) { + pr_perror("Can't create %s id %#x\n", + pair ? "socketpair" : "socket", + ui->ue->id); + return -1; + } + } + + if (disjoin_master && pair) + return break_connected(ui, sks[0]); + + return 0; +} + static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) { struct unix_sk_info *queuer = ui->queuer; struct unix_sk_info *peer = ui->peer; struct fdinfo_list_entry *fle, *fle_peer; - int sk; + int sks[2]; fle = file_master(&ui->d); pr_info_opening("standalone", ui, fle); @@ -1875,21 +1942,14 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) * was successful. */ if (ui->ue->uflags & UNIX_UFLAGS__SERVICE) { - int sks[2]; - - if (socketpair(PF_UNIX, ui->ue->type, 0, sks)) { - pr_perror("Can't create socketpair"); + if (make_socket(ui, sks, true, false)) return -1; - } if (send_criu_dump_resp(sks[1], true, true) == -1) return -1; close(sks[1]); - sk = sks[0]; } else if (ui->ue->state == TCP_ESTABLISHED && queuer && queuer->ue->ino == FAKE_INO) { - int ret, sks[2]; - if (ui->ue->type != SOCK_STREAM) { pr_err("Non-stream socket %d in established state\n", ui->ue->ino); @@ -1902,51 +1962,21 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) return -1; } - ret = socketpair(PF_UNIX, ui->ue->type, 0, sks); - if (ret < 0) { - pr_perror("Can't create socketpair"); + if (make_socket(ui, sks, true, false)) return -1; - } if (setup_second_end(sks, file_master(&queuer->d))) return -1; - - sk = sks[0]; } else if (ui->ue->type == SOCK_DGRAM && queuer && queuer->ue->ino == FAKE_INO) { - struct sockaddr_un addr; - int sks[2]; - - if (socketpair(PF_UNIX, ui->ue->type, 0, sks) < 0) { - pr_perror("Can't create socketpair"); + if (make_socket(ui, sks, true, true)) return -1; - } - - sk = sks[0]; - addr.sun_family = AF_UNSPEC; - - /* - * socketpair() assigns sks[1] as a peer of sks[0] - * (and vice versa). But in this case (not zero peer) - * it's impossible for other sockets to connect - * to sks[0] (see unix_dgram_connect()->unix_may_send()). - * The below is hack: we use that connect with AF_UNSPEC - * clears socket's peer. - * Note, that connect hack flushes receive queue, - * so restore_unix_queue() must be after it. - */ - if (connect(sk, (struct sockaddr *)&addr, sizeof(addr.sun_family))) { - pr_perror("Can't clear socket's peer"); - return -1; - } if (setup_second_end(sks, file_master(&queuer->d))) return -1; - - sk = sks[0]; } else { if (ui->ue->uflags & UNIX_UFLAGS__CALLBACK) { - sk = run_plugins(RESTORE_UNIX_SK, ui->ue->ino); - if (sk >= 0) + sks[0] = run_plugins(RESTORE_UNIX_SK, ui->ue->ino); + if (sks[0] >= 0) goto out; } @@ -1962,23 +1992,24 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) return -1; } - sk = socket(PF_UNIX, ui->ue->type, 0); - if (sk < 0) { - pr_perror("Can't make unix socket"); + pr_debug("socketpair instead of plain socket\n"); + if (make_socket(ui, sks, false, true)) return -1; - } + close(sks[1]); } - if (bind_unix_sk(sk, ui)) { - close(sk); - return -1; + if (!(ui->ue->uflags & UNIX_UFLAGS__BINDMOUNT)) { + if (bind_unix_sk(sks[0], ui)) { + close(sks[0]); + return -1; + } } if (ui->ue->state == TCP_LISTEN) { pr_info("\tPutting %d into listen state\n", ui->ue->ino); - if (listen(sk, ui->ue->backlog) < 0) { + if (listen(sks[0], ui->ue->backlog) < 0) { pr_perror("Can't make usk listen"); - close(sk); + close(sks[0]); return -1; } ui->listen = 1; @@ -1993,15 +2024,15 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) * 2)Queuer won't be able to connect, if we do * shutdown, so postpone it. */ - *new_fd = sk; + *new_fd = sks[0]; return 1; } out: - if (restore_sk_common(sk, ui)) + if (restore_sk_common(sks[0], ui)) return -1; - *new_fd = sk; + *new_fd = sks[0]; return 0; } @@ -2108,7 +2139,8 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue) ui->name_dir = (void *)ue->name_dir; ui->flags = 0; - ui->fdstore_id = -1; + ui->fdstore_mnt_id[0] = -1; /* fdstore_id in union */ + ui->fdstore_mnt_id[1] = -1; ui->ghost_dir_pos = 0; ui->peer = NULL; ui->queuer = NULL; @@ -2221,8 +2253,24 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) list_add_tail(&ui->ghost_node, &unix_ghost_addr); } - if (ui->ue->uflags & UNIX_UFLAGS__BINDMOUNT) + if (ui->ue->uflags & UNIX_UFLAGS__BINDMOUNT) { + /* + * Make sure it is supported socket! + */ + if ((ui->ue->uflags & ~UNIX_UFLAGS__BINDMOUNT) || + (ui->ue->type != SOCK_DGRAM) || + (ui->ue->state != TCP_CLOSE)) { + pr_err("bindmount: Unsupported socket id %#x " + "(expect %x:%s:%s got %x:%s:%s)\n", + ui->ue->id, UNIX_UFLAGS__BINDMOUNT, + ___socket_type_name(SOCK_DGRAM), + ___tcp_state_name(TCP_CLOSE), + ui->ue->uflags, ___socket_type_name(ui->ue->type), + ___tcp_state_name(ui->ue->state)); + return -1; + } list_add_tail(&ui->mnt_list, &unix_mnt_sockets); + } list_add_tail(&ui->list, &unix_sockets); return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops); @@ -2236,6 +2284,119 @@ struct collect_image_info unix_sk_cinfo = { .flags = COLLECT_SHARED, }; +int unix_prepare_bindmount(struct mount_info *mi) +{ + int prev_cwd_fd = -1, prev_root_fd = -1; + int ret = -1, sks[2] = { -1, -1 }; + struct unix_sk_info *ui; + char path[PATH_MAX]; + + list_for_each_entry(ui, &unix_mnt_sockets, mnt_list) { + if (ui->ue->mnt_id == mi->mnt_id) { + char type_name[64], state_name[64]; + pr_info("bindmount: id %#x ino %d type %s state %s queuer %p peer %d (name %.*s dir %s)\n", + ui->ue->id, ui->ue->ino, + __socket_type_name(ui->ue->type, type_name), + __tcp_state_name(ui->ue->state, state_name), + ui->queuer, ui->ue->peer, + (int)ui->ue->name.len, ui->ue->name.data, + ui->name_dir ? ui->name_dir : "-"); + break; + } + } + + if (&ui->mnt_list == &unix_mnt_sockets) + return 0; + + /* + * Mark it as bindmount so when need to use we + * would fetch it from the fdstore, and point + * out that no need to cwd change since we + * already opened it in proper place. + */ + ui->flags |= USK_BINDMOUNT | USK_NOCWD; + + if (rst_get_mnt_root(mi->mnt_id, path, sizeof(path)) < 0) { + pr_err("bindmount: Can't setup mnt_root for %s\n", mi->ns_mountpoint); + return -1; + } + + prev_cwd_fd = open(".", O_RDONLY); + if (prev_cwd_fd < 0) { + pr_perror("bindmount: Can't save current cwd"); + goto out; + } + + prev_root_fd = open("/", O_RDONLY); + if (prev_root_fd < 0) { + pr_perror("bindmount: Can't save current root"); + goto out; + } + + if (chdir(path)) { + pr_perror("bindmount: Can't chdir to %s", path); + goto out; + } else if (chroot(".")) { + pr_perror("bindmount: Can't chroot"); + goto out; + } + + if (ui->name_dir && chdir(ui->name_dir)) { + pr_perror("bindmount: Can't chdir to %s", ui->name_dir); + goto out; + } + + if (set_netns(ui->ue->ns_id)) + return -1; + + /* + * We support only DGRAM sockets for now so it is safe + * to preallocate socket pair here and later the + * open_unixsk_standalone helper will simply fetch the + * peers, closing the ends it doesn't need. + */ + if (socketpair(PF_UNIX, ui->ue->type, 0, sks)) { + pr_perror("bindmount: Can't create socketpair id %#x", + ui->ue->id); + goto out; + } + + if (bind_unix_sk(sks[0], ui)) + goto out; + + ui->fdstore_mnt_id[0] = fdstore_add(sks[0]); + ui->fdstore_mnt_id[1] = fdstore_add(sks[1]); + if (ui->fdstore_mnt_id[0] < 0 || ui->fdstore_mnt_id[1] < 0) { + pr_err("bindmount: Can't add socketpair id %#x into fdstore\n", + ui->ue->id); + goto out; + } + + if (fchdir(prev_root_fd)) { + pr_perror("bindmount: Can't revert root directory"); + goto out; + } else if (chroot(".")) { + pr_perror("bindmount: Can't revert chroot "); + goto out; + } else if (fchdir(prev_cwd_fd)) { + pr_perror("bindmount: Can't revert working dir"); + goto out; + } + + ret = 0; +out: + close_safe(&prev_cwd_fd); + close_safe(&prev_root_fd); + close_safe(&sks[0]); + close_safe(&sks[1]); + + if (ret == 0) + pr_debug("bindmount: Standalone socket moved into fdstore (id %#x ino %d peer %d)\n", + ui->ue->id, ui->ue->ino, ui->ue->peer); + + return ret; +} + static void set_peer(struct unix_sk_info *ui, struct unix_sk_info *peer) { ui->peer = peer; From 3ec1c5a047a2bc7006e196d710fdf0449b530d7c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 29 Aug 2018 17:10:20 +0300 Subject: [PATCH 265/277] unix: test -- Add bind-mount-unix test case To test a case where unix socket is bind mounted to somewhere so restore may fail if socket has not been created. Signed-off-by: Cyrill Gorcunov --- test/zdtm/static/Makefile | 1 + test/zdtm/static/bind-mount-unix.c | 139 ++++++++++++++++++++++++++ test/zdtm/static/bind-mount-unix.desc | 1 + 3 files changed, 141 insertions(+) create mode 100644 test/zdtm/static/bind-mount-unix.c create mode 100644 test/zdtm/static/bind-mount-unix.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index a8e4107d3b..e3ee397d7b 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -359,6 +359,7 @@ TST_DIR = \ del_standalone_un \ sk-unix-mntns \ sk-unix01 \ + bind-mount-unix \ unsupported_children_collision \ shared_slave_mount_children \ non_uniform_share_propagation \ diff --git a/test/zdtm/static/bind-mount-unix.c b/test/zdtm/static/bind-mount-unix.c new file mode 100644 index 0000000000..2981c408a1 --- /dev/null +++ b/test/zdtm/static/bind-mount-unix.c @@ -0,0 +1,139 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check bind-mounts with unix socket"; +const char *test_author = "Cyrill Gorcunov "; + +char *dirname; +TEST_OPTION(dirname, string, "directory name", 1); + +int main(int argc, char **argv) +{ + char path_unix[PATH_MAX], path_bind[PATH_MAX]; + char unix_name[] = "criu-log"; + char bind_name[] = "criu-bind-log"; + int sk = -1, skc = -1, ret = 1, fd; + struct sockaddr_un addr; + unsigned int addrlen; + struct stat st; + + char buf[] = "123456"; + char rbuf[sizeof(buf)]; + + test_init(argc, argv); + + mkdir(dirname, 0700); + if (mount("none", dirname, "tmpfs", 0, NULL)) { + pr_perror("Unable to mount %s", dirname); + return 1; + } + + ssprintf(path_bind, "%s/%s", dirname, bind_name); + ssprintf(path_unix, "%s/%s", dirname, unix_name); + + unlink(path_bind); + unlink(path_unix); + + fd = open(path_bind, O_RDONLY | O_CREAT); + if (fd < 0) { + pr_perror("Can't open %s", path_bind); + goto err; + } + close(fd); + + addr.sun_family = AF_UNIX; + sstrncpy(addr.sun_path, path_unix); + addrlen = sizeof(addr.sun_family) + strlen(path_unix); + + sk = socket(AF_UNIX, SOCK_DGRAM, 0); + if (sk < 0) { + pr_perror("Can't create socket %s", path_unix); + goto err; + } + + ret = bind(sk, (struct sockaddr *)&addr, addrlen); + if (ret) { + pr_perror("Can't bind socket %s", path_unix); + goto err; + } + + if (stat(path_unix, &st) == 0) { + test_msg("path %s st.st_ino %#lx st.st_mode 0%o (sock %d)\n", + path_unix, (unsigned long)st.st_ino, + (int)st.st_mode, !!S_ISSOCK(st.st_mode)); + } else + pr_perror("Can't stat on %s", path_unix); + + if (mount(path_unix, path_bind, NULL, MS_BIND | MS_REC, NULL)) { + pr_perror("Unable to bindmount %s -> %s", path_unix, path_bind); + goto err; + } + + if (stat(path_unix, &st) == 0) { + test_msg("path %s st.st_dev %#x st.st_rdev %#x st.st_ino %#lx st.st_mode 0%o (sock %d)\n", + path_unix, (int)st.st_dev, (int)st.st_rdev, (unsigned long)st.st_ino, + (int)st.st_mode, !!S_ISSOCK(st.st_mode)); + } else + pr_perror("Can't stat on %s", path_unix); + + if (stat(path_bind, &st) == 0) { + test_msg("path %s st.st_dev %#x st.st_rdev %#x st.st_ino %#lx st.st_mode 0%o (sock %d)\n", + path_bind, (int)st.st_dev, (int)st.st_rdev, (unsigned long)st.st_ino, + (int)st.st_mode, !!S_ISSOCK(st.st_mode)); + } else + pr_perror("Can't stat on %s", path_bind); + + test_daemon(); + test_waitsig(); + + skc = socket(AF_UNIX, SOCK_DGRAM, 0); + if (skc < 0) { + pr_perror("Can't create client socket"); + goto err; + } + + addr.sun_family = AF_UNIX; + sstrncpy(addr.sun_path, path_bind); + addrlen = sizeof(addr.sun_family) + strlen(path_bind); + + ret = sendto(skc, buf, sizeof(buf), 0, (struct sockaddr *)&addr, addrlen); + if (ret != (int)sizeof(buf)) { + pr_perror("Can't send data on client"); + ret = 1; + goto err; + } + + ret = read(sk, rbuf, sizeof(rbuf)); + if (ret < 0) { + pr_perror("Can't read data"); + ret = 1; + goto err; + } + + if (ret != sizeof(buf) || memcmp(buf, rbuf, sizeof(buf))) { + pr_err("Data mismatch"); + ret = 1; + goto err; + } + + pass(); + ret = 0; + +err: + umount2(path_bind, MNT_DETACH); + umount2(dirname, MNT_DETACH); + unlink(path_bind); + unlink(path_unix); + close(sk); + return ret; +} diff --git a/test/zdtm/static/bind-mount-unix.desc b/test/zdtm/static/bind-mount-unix.desc new file mode 100644 index 0000000000..a8849e0970 --- /dev/null +++ b/test/zdtm/static/bind-mount-unix.desc @@ -0,0 +1 @@ +{'flavor': 'ns uns', 'flags': 'suid', 'feature': 'mnt_id'} From 6c9ee70ab7ea2eaa2b1301951e9d307fa547931e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 29 Aug 2018 20:02:08 +0300 Subject: [PATCH 266/277] unix: bindmount -- Move mounting code to be called before first mount Otherwise the mount call may fail since bindmounted unix socket wont be created. Signed-off-by: Cyrill Gorcunov --- criu/mount.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 46395b7c8b..208a492c10 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2235,6 +2235,12 @@ static int do_bind_mount(struct mount_info *mi) mnt_path = mnt_fd_path; } + if (unix_prepare_bindmount(mi)) { + pr_err("Failed to prepare bindmount on unix at %s\n", + mi->mountpoint); + goto err; + } + if (cut_root[0] == 0) /* This case is handled by mi->bind->fd */ goto skip_overmount_check; @@ -2617,12 +2623,6 @@ static int try_remap_mount(struct mount_info *m) if (!mnt_needs_remap(m)) - if (unix_prepare_bindmount(mi)) { - pr_err("Failed to prepare bindmount on unix at %s\n", - mi->mountpoint); - goto err; - } - return 0; BUG_ON(!m->parent); From 271a6cb11e2fb0ba2ceab424a23b63072eeaad7a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 9 Jun 2018 16:26:03 +0300 Subject: [PATCH 267/277] zdtm: Add sstrncpy helper To elimitane compilation warnings with gcc-8. Signed-off-by: Cyrill Gorcunov --- test/zdtm/lib/zdtmtst.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/zdtm/lib/zdtmtst.h b/test/zdtm/lib/zdtmtst.h index 6eec266475..bf9e21bf40 100644 --- a/test/zdtm/lib/zdtmtst.h +++ b/test/zdtm/lib/zdtmtst.h @@ -174,4 +174,9 @@ extern pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *par ___ret; \ }) +#define sstrncpy(d, s) ({ \ + strncpy(d, s, sizeof(d)-1); \ + d[sizeof(d)-1] = '\0'; \ +}) + #endif /* _VIMITESU_H_ */ From 803ffb0bfc1731c69a6b3bd051e29756c6487426 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 5 Sep 2018 12:29:24 +0300 Subject: [PATCH 268/277] unix: bindmount -- Show details about queuer Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index f0661e9b52..e02dda85d9 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -2294,13 +2294,14 @@ int unix_prepare_bindmount(struct mount_info *mi) list_for_each_entry(ui, &unix_mnt_sockets, mnt_list) { if (ui->ue->mnt_id == mi->mnt_id) { char type_name[64], state_name[64]; - pr_info("bindmount: id %#x ino %d type %s state %s queuer %p peer %d (name %.*s dir %s)\n", + pr_info("bindmount: id %#x ino %d type %s state %s (queuer id %#x ino %d) peer %d (name %.*s dir %s)\n", ui->ue->id, ui->ue->ino, __socket_type_name(ui->ue->type, type_name), __tcp_state_name(ui->ue->state, state_name), - ui->queuer, ui->ue->peer, - (int)ui->ue->name.len, ui->ue->name.data, - ui->name_dir ? ui->name_dir : "-"); + ui->queuer ? ui->queuer->ue->id : -1, + ui->queuer ? ui->queuer->ue->ino : -1, + ui->ue->peer, (int)ui->ue->name.len, + ui->ue->name.data, ui->name_dir ? ui->name_dir : "-"); break; } } From 6ee1ad6c23364e35df952031b673729f4a41bb64 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 5 Sep 2018 16:03:07 +0300 Subject: [PATCH 269/277] unix: break_connected -- Add a log recod For better debug Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index e02dda85d9..b57f1c9fdd 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1850,6 +1850,8 @@ static int break_connected(struct unix_sk_info *ui, int sk) { if (ui->ue->type == SOCK_DGRAM) { struct sockaddr_un addr = { .sun_family = AF_UNSPEC }; + pr_debug("Break connected id %#x ino %d\n", + ui->ue->id, ui->ue->ino); /* * socketpair() assigns sks[1] as a peer of sks[0] * (and vice versa). But in this case (not zero peer) From 32e74c25ed01365c49a79446bda9c6cd3c7a7d41 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 5 Sep 2018 16:24:01 +0300 Subject: [PATCH 270/277] unix: make_socket -- Add more detailed logs Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index b57f1c9fdd..e2b4919250 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1875,6 +1875,8 @@ static int make_socket(struct unix_sk_info *ui, int sks[2], bool pair, bool disj if (unlikely(ui->flags & USK_BINDMOUNT)) { sks[0] = fdstore_get(ui->fdstore_mnt_id[0]); sks[1] = fdstore_get(ui->fdstore_mnt_id[1]); + pr_debug("bindmount: Fetch socket pair id %#x ino %d\n", + ui->ue->id, ui->ue->ino); if (sks[0] < 0 || sks[1] < 0) { pr_err("bindmount: Can't fetch id %#x socketpair from the store\n", ui->ue->id); @@ -1885,10 +1887,15 @@ static int make_socket(struct unix_sk_info *ui, int sks[2], bool pair, bool disj sks[0] = sks[1] = -1; if (!pair) { + pr_debug("Create socket id %#x ino %d\n", + ui->ue->id, ui->ue->ino); ret = socket(PF_UNIX, ui->ue->type, 0); sks[0] = ret; - } else + } else { + pr_debug("Create socket pair id %#x ino %d\n", + ui->ue->id, ui->ue->ino); ret = socketpair(PF_UNIX, ui->ue->type, 0, sks); + } if (ret < 0) { pr_perror("Can't create %s id %#x\n", From 7f932194e9ef15c44823ee0255a3fb767a445c05 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 5 Sep 2018 16:56:34 +0300 Subject: [PATCH 271/277] unix: make_socket -- Break connected pair if requested No need to check for pair argument, it is rather confusing. Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index e2b4919250..453750bf53 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1905,7 +1905,7 @@ static int make_socket(struct unix_sk_info *ui, int sks[2], bool pair, bool disj } } - if (disjoin_master && pair) + if (disjoin_master) return break_connected(ui, sks[0]); return 0; From 2f9a89f6ece71e04167d1c0a238880dc39ea3dbe Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 14 Sep 2018 15:20:02 +0300 Subject: [PATCH 272/277] unix: bind_unix_sk -- Add ability to skip waiters notification Currently bind_unix_sk is used in two contexts: to bind freshly created socket pairs and to bind name for sockets which are to be queued into fdstore (binmounted sockets). For first case we should notify the waiting side immediately but in turn bindmount sockets are created early and there might be the case where peers are not yet even opened and notification may simply lost or even cause sigsegv since file list is yet empty. In turn we should defer it until we do a real bindmount socket opening right after we fetched it from the fdstore. https://jira.sw.ru/browse/PSBM-88274 Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 453750bf53..80d6e3360b 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1646,7 +1646,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) return 0; } -static int bind_unix_sk(int sk, struct unix_sk_info *ui) +static int bind_unix_sk(int sk, struct unix_sk_info *ui, bool notify) { struct sockaddr_un addr; int cwd_fd = -1, root_fd = -1, ns_fd = -1; @@ -1705,8 +1705,8 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) goto done; } - if (ui->ue->state != TCP_LISTEN) { - ui->bound = 1; + if (notify && ui->ue->state != TCP_LISTEN) { + ui->bound = true; wake_connected_sockets(ui); } @@ -1793,10 +1793,10 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd) } sk[1] = fle_peer->fe->fd; - if (bind_unix_sk(sk[0], ui)) + if (bind_unix_sk(sk[0], ui, true)) return -1; - if (bind_unix_sk(sk[1], peer)) + if (bind_unix_sk(sk[1], peer, true)) return -1; *new_fd = sk[0]; @@ -2008,10 +2008,13 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) } if (!(ui->ue->uflags & UNIX_UFLAGS__BINDMOUNT)) { - if (bind_unix_sk(sks[0], ui)) { + if (bind_unix_sk(sks[0], ui, true)) { close(sks[0]); return -1; } + } else { + ui->bound = true; + wake_connected_sockets(ui); } if (ui->ue->state == TCP_LISTEN) { @@ -2371,7 +2374,7 @@ int unix_prepare_bindmount(struct mount_info *mi) goto out; } - if (bind_unix_sk(sks[0], ui)) + if (bind_unix_sk(sks[0], ui, false)) goto out; ui->fdstore_mnt_id[0] = fdstore_add(sks[0]); From 9581911d3abb9e886786e2ccaddc71b699263438 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 6 Sep 2018 19:53:47 +0300 Subject: [PATCH 273/277] unix: unlink_sk -- Don't unlink bindmounted sockets The unlink procedure is rather a cleanup before we start creating new sockets, but bindmounted sockets are pre-created early so we should not touch them. Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 80d6e3360b..9b46dba40e 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -2097,7 +2097,9 @@ static int unlink_sk(struct unix_sk_info *ui) { int ret = 0, cwd_fd = -1, root_fd = -1, ns_fd = -1; - if (!ui->name || ui->name[0] == '\0' || (ui->ue->uflags & UNIX_UFLAGS__EXTERN)) + if (!ui->name || ui->name[0] == '\0' || + (ui->flags & USK_BINDMOUNT) || + (ui->ue->uflags & UNIX_UFLAGS__EXTERN)) return 0; if (prep_unix_sk_cwd(ui, &cwd_fd, &root_fd, NULL)) From 726f6b0eb1dcfc3d0f47525baebfa8135342d5a3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 6 Sep 2018 19:58:04 +0300 Subject: [PATCH 274/277] unix: unix_prepare_bindmount -- Allow to connect via relative name Once socket is bounded we should allow to connect to us via relative name. https://jira.sw.ru/browse/PSBM-88274 Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 9b46dba40e..e5345ed74c 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -2398,6 +2398,12 @@ int unix_prepare_bindmount(struct mount_info *mi) goto out; } + /* + * Once we are pre-created and bounded, clear + * the USK_NOCWD flag so other sockets migh connect + * to us via relative name. + */ + ui->flags &= ~USK_NOCWD; ret = 0; out: close_safe(&prev_cwd_fd); From 95c2c84461498c257a4183d971c4a4b0bdc169f0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 6 Sep 2018 11:59:31 +0300 Subject: [PATCH 275/277] test: bind-mount-unix -- Implement real connect Also full rework to make sure we can test the situation where client is not yet opened when we're restoring bindmount. Signed-off-by: Cyrill Gorcunov --- test/zdtm/static/bind-mount-unix.c | 98 +++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 30 deletions(-) diff --git a/test/zdtm/static/bind-mount-unix.c b/test/zdtm/static/bind-mount-unix.c index 2981c408a1..7f649ed70d 100644 --- a/test/zdtm/static/bind-mount-unix.c +++ b/test/zdtm/static/bind-mount-unix.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -25,12 +26,16 @@ int main(int argc, char **argv) int sk = -1, skc = -1, ret = 1, fd; struct sockaddr_un addr; unsigned int addrlen; + task_waiter_t t; struct stat st; + int status; + pid_t pid; char buf[] = "123456"; char rbuf[sizeof(buf)]; test_init(argc, argv); + task_waiter_init(&t); mkdir(dirname, 0700); if (mount("none", dirname, "tmpfs", 0, NULL)) { @@ -47,7 +52,7 @@ int main(int argc, char **argv) fd = open(path_bind, O_RDONLY | O_CREAT); if (fd < 0) { pr_perror("Can't open %s", path_bind); - goto err; + return 1; } close(fd); @@ -58,76 +63,108 @@ int main(int argc, char **argv) sk = socket(AF_UNIX, SOCK_DGRAM, 0); if (sk < 0) { pr_perror("Can't create socket %s", path_unix); - goto err; + return 1; } ret = bind(sk, (struct sockaddr *)&addr, addrlen); if (ret) { pr_perror("Can't bind socket %s", path_unix); - goto err; + return 1; } if (stat(path_unix, &st) == 0) { test_msg("path %s st.st_ino %#lx st.st_mode 0%o (sock %d)\n", path_unix, (unsigned long)st.st_ino, (int)st.st_mode, !!S_ISSOCK(st.st_mode)); - } else + } else { pr_perror("Can't stat on %s", path_unix); + return 1; + } if (mount(path_unix, path_bind, NULL, MS_BIND | MS_REC, NULL)) { pr_perror("Unable to bindmount %s -> %s", path_unix, path_bind); - goto err; + return 1; } if (stat(path_unix, &st) == 0) { test_msg("path %s st.st_dev %#x st.st_rdev %#x st.st_ino %#lx st.st_mode 0%o (sock %d)\n", path_unix, (int)st.st_dev, (int)st.st_rdev, (unsigned long)st.st_ino, (int)st.st_mode, !!S_ISSOCK(st.st_mode)); - } else + } else { pr_perror("Can't stat on %s", path_unix); + return 1; + } if (stat(path_bind, &st) == 0) { test_msg("path %s st.st_dev %#x st.st_rdev %#x st.st_ino %#lx st.st_mode 0%o (sock %d)\n", path_bind, (int)st.st_dev, (int)st.st_rdev, (unsigned long)st.st_ino, (int)st.st_mode, !!S_ISSOCK(st.st_mode)); - } else + } else { pr_perror("Can't stat on %s", path_bind); + return 1; + } - test_daemon(); - test_waitsig(); - - skc = socket(AF_UNIX, SOCK_DGRAM, 0); - if (skc < 0) { - pr_perror("Can't create client socket"); - goto err; + pid = test_fork(); + if (pid < 0) { + pr_perror("Can't fork"); + return 1; + } else if (pid == 0) { + skc = socket(AF_UNIX, SOCK_DGRAM, 0); + if (skc < 0) { + pr_perror("Can't create client socket"); + _exit(1); + } + + addr.sun_family = AF_UNIX; + sstrncpy(addr.sun_path, path_bind); + addrlen = sizeof(addr.sun_family) + strlen(path_bind); + + ret = connect(skc, (struct sockaddr *)&addr, addrlen); + if (ret) { + pr_perror("Can't connect\n"); + _exit(1); + } else + test_msg("Connected to %s", addr.sun_path); + + task_waiter_complete(&t, 1); + task_waiter_wait4(&t, 2); + + ret = sendto(skc, buf, sizeof(buf), 0, (struct sockaddr *)&addr, addrlen); + if (ret != (int)sizeof(buf)) { + pr_perror("Can't send data on client"); + _exit(1); + } + + close(skc); + _exit(0); } - addr.sun_family = AF_UNIX; - sstrncpy(addr.sun_path, path_bind); - addrlen = sizeof(addr.sun_family) + strlen(path_bind); + task_waiter_wait4(&t, 1); - ret = sendto(skc, buf, sizeof(buf), 0, (struct sockaddr *)&addr, addrlen); - if (ret != (int)sizeof(buf)) { - pr_perror("Can't send data on client"); - ret = 1; - goto err; - } + test_daemon(); + test_waitsig(); + + task_waiter_complete(&t, 2); ret = read(sk, rbuf, sizeof(rbuf)); if (ret < 0) { - pr_perror("Can't read data"); - ret = 1; + fail("Can't read data"); goto err; } if (ret != sizeof(buf) || memcmp(buf, rbuf, sizeof(buf))) { - pr_err("Data mismatch"); - ret = 1; + fail("Data mismatch"); goto err; } - pass(); - ret = 0; + ret = wait(&status); + if (ret == -1 || !WIFEXITED(status) || WEXITSTATUS(status)) { + kill(pid, SIGKILL); + fail("Unable to wait child"); + } else { + ret = 0; + pass(); + } err: umount2(path_bind, MNT_DETACH); @@ -135,5 +172,6 @@ int main(int argc, char **argv) unlink(path_bind); unlink(path_unix); close(sk); - return ret; + + return ret ? 1 : 0; } From 2aa32b34f47eb546cb4a3c723adc865d0c38c043 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 27 Sep 2018 16:12:00 +0300 Subject: [PATCH 276/277] unix: Don't forget to close ns descriptor on error path Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index e5345ed74c..10bcdaac61 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1284,8 +1284,10 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd, if (ns_fd < 0) return -1; - if (switch_ns_by_fd(ns_fd, &mnt_ns_desc, prev_mntns_fd)) + if (switch_ns_by_fd(ns_fd, &mnt_ns_desc, prev_mntns_fd)) { + close(ns_fd); return -1; + } set_proc_self_fd(-1); close(ns_fd); From bdc533e80ca7aaffcc0912b7f442d98f3a0312a4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 2 Oct 2018 11:55:22 +0300 Subject: [PATCH 277/277] unix: Disable switching mount namespace Previously we always created sockets in root mount namespace but in 019ebec03ea2bae127bd2eedcb70a22c32f05d49 we've tried to resolve this problem setting up proper mount namespace. This is not always possible though: the root task from which we trying to switch ns might already have a number of children with CLONE_FS|CLONE_FILES set and kernel doesn't allow to do that. Lets disable this ability until we find a proper solution. https://jira.sw.ru/browse/PSBM-89126 Signed-off-by: Cyrill Gorcunov --- criu/sk-unix.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 10bcdaac61..c9f30abd42 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1271,6 +1271,23 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd, if (ui->flags & USK_NOCWD) return 0; + /* + * To change mount namespace we should have fs->user = 1 + * (see fs/namespace.c:mntns_install) but this is not + * usually possible since main criu process already may + * has forked() with CLONE_FS | CLONE_FILES and fs->user + * is a way bigger. + * + * For now simply switch to old scheme where all sockets + * are restored in root mount namespace. + * + * FIXME: Need to revisit later. + */ + if (prev_mntns_fd && ui->name[0] && ui->ue->mnt_id >= 0) { + *prev_mntns_fd = -1; + prev_mntns_fd = NULL; + } + if (prev_mntns_fd && ui->name[0] && ui->ue->mnt_id >= 0) { struct ns_id *mntns = lookup_nsid_by_mnt_id(ui->ue->mnt_id); int ns_fd;