Skip to content

Commit

Permalink
Process Migration using Sockets (p1)
Browse files Browse the repository at this point in the history
This patch introduces the --remote option and the necessary code changes to
support it. This leaves user the option to decide if the checkpoint data is to
be stored on disk or sent through the network (through the image-proxy).
The latter forwards the data to the destination node where image-cache
receives it.

The overall communication is performed as follows:
src_node CRIU dump -> (sends images through UNIX sockets) ->      image-proxy
								       |
								       V
dst_node: CRIU restore <- (receives images through UNIX sockets)<- image-cache

Communication between image-proxy and image-cache is done through a single
TCP connection.

Running criu with --remote option is like this:

dst_node# criu image-cache -d --port <port> -o /tmp/image-cache.log
dst_node# criu restore --remote -o /tmp/image-cache.log
src_node# criu image-proxy -d --port <port> --address <dst_node> -o /tmp/image-proxy.log
src_node# criu dump -t <pid> --remote -o /tmp/dump.log

    [ xemul:
here's the list of what should be done with the cache/proxy
in order to have them merged into master.

0. Document the whole thing :)
   Please, add articles for newly introduced actions and options to
   https://criu.org/CLI page.
   Also, it would be good to have an article describing the protocols
   involved.

1. Make the unix sockets reside in work-dir.
   The good thing is that we've get rid of the socket name option :)
   But looking at do_open_remote_image() I see that it fchdir-s to
   image dir before connecting to proxy/cache. Better solution is to
   put the socket into workdir.

   1a. After this the option -D|--images-dir should become optional.
       Provided the --remote is given CRIU should work purely on the
       work-dir and not generate anything in the images-dir.

2. Tune up the image_cache and image_proxy commands to accept the
   --status-fd and --pidfile options.
   Presumably the very cr_daemon() call should be equipped with
   everything that should be done for daemonizing and proxy/cache
   tasks should just call it :)

3. Fix local connections not to generate per-image threads. There
   can be many images and it's not nice to stress the system with
   such amount of threads. Please, look at how criu/uffd.c manages
   multiple descriptors with page-faults using the epoll stuff.

   3a. The accept_remote_image_connections() seem not to work well
       with opts.ps_socket scenario as the former just calls accept()
       on whatever socket is passed there, while the opts.ps_socket
       is already an established socket for data transfer.

4. No strings in protocol. Now the hard-coded "RESTORE_FINISH" string
   (and DUMP_FINISHED one) is used to terminate the communication.
   Need to tune up the protobuf objects to send boolean (or integer)
   EOF sign rather that the string.

5. Check how proxy/cache works with incremental dumps. Looking at the
   skip_remote_bytes() I think that image-cache and -proxy still do not
   work well with stacked pages images. Probably for those we'll need
   the page-server or lazy-pages -like protocol that would request the
   needed regions and receive it back rather than read bytes from
   sockets simply to skip those.

6. Add support for cache/proxy into go-phaul code. I haven't yet finished
   with the prototype, but plan to do it soon, so once the above steps
   are done we'll be able to proceed with this one.

]

Signed-off-by: Rodrigo Bruno <rbruno@gsd.inesc-id.pt>
Signed-off-by: Katerina Koukiou <k.koukiou@gmail.com>
Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com>
  • Loading branch information
rodrigo-bruno authored and avagin committed Jul 10, 2018
1 parent e73abc4 commit a14f0d0
Show file tree
Hide file tree
Showing 15 changed files with 550 additions and 21 deletions.
4 changes: 4 additions & 0 deletions criu/Makefile.crtools
Expand Up @@ -31,6 +31,10 @@ obj-y += files-reg.o
obj-y += fsnotify.o
obj-y += image-desc.o
obj-y += image.o
obj-y += img-remote.o
obj-y += img-proxy.o
obj-y += img-cache.o
obj-y += img-remote-proto.o
obj-y += ipc_ns.o
obj-y += irmap.o
obj-y += kcmp-ids.o
Expand Down
16 changes: 16 additions & 0 deletions criu/cr-dump.c
Expand Up @@ -82,6 +82,7 @@
#include "seize.h"
#include "fault-injection.h"
#include "dump.h"
#include "img-remote.h"

/*
* Architectures can overwrite this function to restore register sets that
Expand Down Expand Up @@ -1542,6 +1543,11 @@ int cr_pre_dump_tasks(pid_t pid)
struct pstree_item *item;
int ret = -1;

if (opts.remote && push_snapshot_id() < 0) {
pr_err("Failed to push image namespace.\n");
goto err;
}

root_item = alloc_pstree_item();
if (!root_item)
goto err;
Expand Down Expand Up @@ -1716,6 +1722,11 @@ static int cr_dump_finish(int ret)

close_service_fd(CR_PROC_FD_OFF);

if (opts.remote && (finish_remote_dump() < 0)) {
pr_err("Finish remote dump failed.\n");
return post_dump_ret ? : 1;
}

if (ret) {
pr_err("Dumping FAILED.\n");
} else {
Expand All @@ -1737,6 +1748,11 @@ int cr_dump_tasks(pid_t pid)
pr_info("Dumping processes (pid: %d)\n", pid);
pr_info("========================================\n");

if (opts.remote && push_snapshot_id() < 0) {
pr_err("Failed to push image namespace.\n");
goto err;
}

root_item = alloc_pstree_item();
if (!root_item)
goto err;
Expand Down
6 changes: 6 additions & 0 deletions criu/cr-restore.c
Expand Up @@ -31,6 +31,7 @@
#include "cr_options.h"
#include "servicefd.h"
#include "image.h"
#include "img-remote.h"
#include "util.h"
#include "util-pie.h"
#include "criu-log.h"
Expand Down Expand Up @@ -2389,6 +2390,11 @@ int cr_restore_tasks(void)
goto err;

ret = restore_root_task(root_item);

if (opts.remote && (finish_remote_restore() < 0)) {
pr_err("Finish remote restore failed.\n");
goto err;
}
err:
cr_plugin_fini(CR_PLUGIN_STAGE__RESTORE, ret);
return ret;
Expand Down
14 changes: 14 additions & 0 deletions criu/crtools.c
Expand Up @@ -56,6 +56,7 @@

#include "setproctitle.h"
#include "sysctl.h"
#include "img-remote.h"

struct cr_options opts;

Expand Down Expand Up @@ -317,6 +318,7 @@ int main(int argc, char *argv[], char *envp[])
BOOL_OPT(SK_CLOSE_PARAM, &opts.tcp_close),
{ "verbosity", optional_argument, 0, 'v' },
{ "ps-socket", required_argument, 0, 1091},
BOOL_OPT("remote", &opts.remote),
{ },
};

Expand Down Expand Up @@ -793,6 +795,12 @@ int main(int argc, char *argv[], char *envp[])
if (!strcmp(argv[optind], "page-server"))
return cr_page_server(opts.daemon_mode, false, -1) != 0;

if (!strcmp(argv[optind], "image-cache"))
return image_cache(opts.daemon_mode, DEFAULT_CACHE_SOCKET, opts.port);

if (!strcmp(argv[optind], "image-proxy"))
return image_proxy(opts.daemon_mode, DEFAULT_PROXY_SOCKET, opts.addr, opts.port);

if (!strcmp(argv[optind], "service"))
return cr_service(opts.daemon_mode);

Expand Down Expand Up @@ -821,6 +829,8 @@ int main(int argc, char *argv[], char *envp[])
" criu service [<options>]\n"
" criu dedup\n"
" criu lazy-pages -D DIR [<options>]\n"
" criu image-cache [<options>]\n"
" criu image-proxy [<options>]\n"
"\n"
"Commands:\n"
" dump checkpoint a process/tree identified by pid\n"
Expand All @@ -832,6 +842,8 @@ int main(int argc, char *argv[], char *envp[])
" dedup remove duplicates in memory dump\n"
" cpuinfo dump writes cpu information into image file\n"
" cpuinfo check validates cpu information read from image file\n"
" image-proxy launch dump-side proxy to sent images\n"
" image-cache launch restore-side cache to reveive images\n"
);

if (usage_error) {
Expand Down Expand Up @@ -884,6 +896,8 @@ int main(int argc, char *argv[], char *envp[])
" macvlan[IFNAME]:OUTNAME\n"
" mnt[COOKIE]:ROOT\n"
"\n"
" --remote dump/restore images directly to/from remote node using\n"
" image-proxy/image-cache\n"
"* Special resources support:\n"
" --" SK_EST_PARAM " checkpoint/restore established TCP connections\n"
" --" SK_INFLIGHT_PARAM " skip (ignore) in-flight TCP connections\n"
Expand Down
47 changes: 45 additions & 2 deletions criu/image.c
Expand Up @@ -17,6 +17,7 @@
#include "images/inventory.pb-c.h"
#include "images/pagemap.pb-c.h"
#include "proc_parse.h"
#include "img-remote.h"

bool ns_per_id = false;
bool img_common_magic = true;
Expand Down Expand Up @@ -361,13 +362,53 @@ static int img_write_magic(struct cr_img *img, int oflags, int type)
return write_img(img, &imgset_template[type].magic);
}

int do_open_remote_image(int dfd, char *path, int flags)
{
char *snapshot_id = NULL;
int ret;

/* When using namespaces, the current dir is changed so we need to
* change to previous working dir and back to correctly open the image
* proxy and cache sockets. */
int save = dirfd(opendir("."));
if (fchdir(get_service_fd(IMG_FD_OFF)) < 0) {
pr_debug("fchdir to dfd failed!\n");
return -1;
}

snapshot_id = get_snapshot_id_from_idx(dfd);

if (snapshot_id == NULL)
ret = -1;
else if (flags == O_RDONLY) {
pr_debug("do_open_remote_image RDONLY path=%s snapshot_id=%s\n",
path, snapshot_id);
ret = read_remote_image_connection(snapshot_id, path);
} else {
pr_debug("do_open_remote_image WDONLY path=%s snapshot_id=%s\n",
path, snapshot_id);
ret = write_remote_image_connection(snapshot_id, path, O_WRONLY);
}

if (fchdir(save) < 0) {
pr_debug("fchdir to save failed!\n");
return -1;
}
close(save);

return ret;
}

static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long oflags, char *path)
{
int ret, flags;

flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL);

ret = openat(dfd, path, flags, CR_FD_PERM);
if (opts.remote && !(oflags & O_FORCE_LOCAL))
ret = do_open_remote_image(dfd, path, flags);
else
ret = openat(dfd, path, flags, CR_FD_PERM);
if (ret < 0) {
if (!(flags & O_CREAT) && (errno == ENOENT || ret == -ENOENT)) {
pr_info("No %s image\n", path);
Expand Down Expand Up @@ -469,7 +510,9 @@ int open_image_dir(char *dir)
close(fd);
fd = ret;

if (opts.img_parent) {
if (opts.remote) {
init_snapshot_id(dir);
} else if (opts.img_parent) {
ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK);
if (ret < 0 && errno != EEXIST) {
pr_perror("Can't link parent snapshot");
Expand Down

0 comments on commit a14f0d0

Please sign in to comment.