Skip to content

Commit 0683fec

Browse files
committed
checkpoint: add pre-dump support
CRIU supports the concept of pre-copy migration. Instead of creating a complete checkpoint of a process it is possible to only write the memory of the process to disk while the process keeps on running. The first memory only checkpoint can then be transferred to the migration destination. During the transfer time it is possible take a second checkpoint which will only write the memory pages to disk that have changed since the previous checkpoint. This way it can be possible that the second checkpoint is much smaller while the process keeps on running which also means that the amount of data which needs to be transferred to the migration destination might be smaller and thus the migration downtime can be reduced. This only makes sense if the number of memory pages which are changing is rather small. There is no limit on the number pre-copy iterations. This commit takes the interface as implemented in runc and implements it for crun. Podman already uses the pre-dump as implemented by runc. This commit also makes sure that the underlying software stack supports the pre-dump mechanism. CRIU uses the kernel's dirty page tracking and it is not available on all architectures (aarch64 does not implement it) or might not be enabled in the kernel. If the user wants to use pre-dump on a system without dirty page tracking crun will fail early and inform the user. This crun pre-dump implementation relies on libcriu interfaces which are not yet part of the latest release (3.16.1). So at least 3.16.2 or 3.17 is required to use pre-dump in combination with crun. Signed-off-by: Adrian Reber <areber@redhat.com>
1 parent 94b209b commit 0683fec

File tree

5 files changed

+120
-23
lines changed

5 files changed

+120
-23
lines changed

configure.ac

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,20 @@ AC_ARG_ENABLE([criu], AS_HELP_STRING([--disable-criu], [Disable CRIU based check
133133
AS_IF([test "x$enable_criu" != "xno"], [
134134
PKG_CHECK_MODULES([CRIU], [criu >= 3.15], [have_criu="yes"], [have_criu="no"
135135
AC_MSG_NOTICE([CRIU headers not found, building without CRIU support])])
136-
PKG_CHECK_MODULES([CRIU], [criu > 3.16], [have_criu_join_ns="yes"], [have_criu_join_ns="no"
137-
AC_MSG_NOTICE([CRIU version doesn't support join-ns API])])
138-
AS_IF([test "$have_criu" = "yes"], [
139-
AC_DEFINE([HAVE_CRIU], 1, [Define if CRIU is available])
140-
AC_SEARCH_LIBS(criu_init_opts, [criu])
141-
])
136+
PKG_CHECK_MODULES([CRIU_JOIN_NS], [criu > 3.16], [have_criu_join_ns="yes"], [have_criu_join_ns="no"
137+
AC_MSG_NOTICE([CRIU version doesn't support join-ns API])])
138+
PKG_CHECK_MODULES([CRIU_PRE_DUMP], [criu > 3.16.1], [have_criu_pre_dump="yes"], [have_criu_pre_dump="no"
139+
AC_MSG_NOTICE([CRIU version doesn't support for pre-dumping])])
140+
AS_IF([test "$have_criu" = "yes"], [
141+
AC_DEFINE([HAVE_CRIU], 1, [Define if CRIU is available])
142+
AC_SEARCH_LIBS(criu_init_opts, [criu])
143+
])
144+
AS_IF([test "$have_criu_join_ns" = "yes"], [
145+
AC_DEFINE([CRIU_JOIN_NS_SUPPORT], 1, [Define if CRIU join NS support is available])
146+
])
147+
AS_IF([test "$have_criu_pre_dump" = "yes"], [
148+
AC_DEFINE([CRIU_PRE_DUMP_SUPPORT], 1, [Define if CRIU pre-dump support is available])
149+
])
142150
], [AC_MSG_NOTICE([CRIU support disabled per user request])])
143151

144152
FOUND_LIBS=$LIBS
@@ -183,7 +191,6 @@ AC_SEARCH_LIBS([argp_parse], [argp], [], [AC_MSG_ERROR([*** argp functions not f
183191

184192
AM_CONDITIONAL([PYTHON_BINDINGS], [test "x$with_python_bindings" = "xyes"])
185193
AM_CONDITIONAL([CRIU_SUPPORT], [test "x$have_criu" = "xyes"])
186-
AM_CONDITIONAL([CRIU_JOIN_NS_SUPPORT], [test "x$have_criu_join_ns" = "xyes"])
187194

188195
AC_CONFIG_FILES([Makefile rpm/crun.spec])
189196

src/checkpoint.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ enum
4141
OPTION_SHELL_JOB,
4242
OPTION_EXT_UNIX_SK,
4343
OPTION_FILE_LOCKS,
44+
OPTION_PARENT_PATH,
45+
OPTION_PRE_DUMP,
4446
};
4547

4648
static char doc[] = "OCI runtime";
@@ -55,6 +57,10 @@ static struct argp_option options[]
5557
{ "ext-unix-sk", OPTION_EXT_UNIX_SK, 0, 0, "allow external unix sockets", 0 },
5658
{ "shell-job", OPTION_SHELL_JOB, 0, 0, "allow shell jobs", 0 },
5759
{ "file-locks", OPTION_FILE_LOCKS, 0, 0, "allow file locks", 0 },
60+
#ifdef CRIU_PRE_DUMP_SUPPORT
61+
{ "parent-path", OPTION_PARENT_PATH, "DIR", 0, "path for previous criu image files in pre-dump", 0 },
62+
{ "pre-dump", OPTION_PRE_DUMP, 0, 0, "dump container's memory information only, leave the container running after this", 0 },
63+
#endif
5864
{
5965
0,
6066
} };
@@ -77,6 +83,14 @@ parse_opt (int key, char *arg arg_unused, struct argp_state *state arg_unused)
7783
cr_options.work_path = argp_mandatory_argument (arg, state);
7884
break;
7985

86+
case OPTION_PARENT_PATH:
87+
cr_options.parent_path = argp_mandatory_argument (arg, state);
88+
break;
89+
90+
case OPTION_PRE_DUMP:
91+
cr_options.pre_dump = true;
92+
break;
93+
8094
case OPTION_LEAVE_RUNNING:
8195
cr_options.leave_running = true;
8296
break;

src/libcrun/container.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3563,7 +3563,7 @@ libcrun_container_checkpoint (libcrun_context_t *context, const char *id, libcru
35633563
if (UNLIKELY (ret < 0))
35643564
return ret;
35653565

3566-
if (! cr_options->leave_running)
3566+
if (! (cr_options->leave_running || cr_options->pre_dump))
35673567
return container_delete_internal (context, NULL, id, true, true, err);
35683568

35693569
return 0;

src/libcrun/container.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ struct libcrun_checkpoint_restore_s
9696
bool detach;
9797
bool file_locks;
9898
const char *console_socket;
99+
char *parent_path;
100+
bool pre_dump;
99101
};
100102
typedef struct libcrun_checkpoint_restore_s libcrun_checkpoint_restore_t;
101103

src/libcrun/criu.c

Lines changed: 89 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,39 @@ criu_notify (char *action, __attribute__ ((unused)) criu_notify_arg_t na)
7575
return 0;
7676
}
7777

78+
# ifdef CRIU_PRE_DUMP_SUPPORT
79+
80+
static int
81+
criu_check_mem_track (char *work_path, libcrun_error_t *err)
82+
{
83+
struct criu_feature_check features = { 0 };
84+
int ret;
85+
86+
/* Right now we are only interested in checking memory tracking.
87+
* Memory tracking can be disabled at different levels. aarch64
88+
* for example has memory tracking not implemented. It could also
89+
* be not enabled on other architectures. Just ask CRIU if that
90+
* features exists. */
91+
92+
features.mem_track = true;
93+
94+
ret = criu_feature_check (&features, sizeof (features));
95+
96+
if (UNLIKELY (ret < 0))
97+
return crun_make_error (err, 0,
98+
"CRIU feature checking failed %d. Please check CRIU logfile %s/%s",
99+
ret, work_path, CRIU_CHECKPOINT_LOG_FILE);
100+
101+
if (features.mem_track == true)
102+
return 1;
103+
104+
return crun_make_error (err, 0,
105+
"Memory tracking not supported. Please check CRIU logfile %s/%s",
106+
work_path, CRIU_CHECKPOINT_LOG_FILE);
107+
}
108+
109+
# endif
110+
78111
static int
79112
restore_cgroup_v1_mount (runtime_spec_schema_config_schema *def, libcrun_error_t *err)
80113
{
@@ -265,15 +298,14 @@ libcrun_container_checkpoint_linux_criu (libcrun_container_status_t *status, lib
265298

266299
criu_set_images_dir_fd (image_fd);
267300

268-
/* descriptors.json is needed during restore to correctly
269-
* reconnect stdin, stdout, stderr. */
270-
ret = append_paths (&descriptors_path, err, cr_options->image_path, DESCRIPTORS_FILENAME, NULL);
271-
if (UNLIKELY (ret < 0))
272-
return ret;
273-
274-
ret = write_file (descriptors_path, status->external_descriptors, strlen (status->external_descriptors), err);
275-
if (UNLIKELY (ret < 0))
276-
return crun_error_wrap (err, "error saving CRIU descriptors file");
301+
/* Set up logging. */
302+
criu_set_log_level (4);
303+
criu_set_log_file (CRIU_CHECKPOINT_LOG_FILE);
304+
/* Setting the pid early as we can skip a lot of checkpoint setup if
305+
* we just do a pre-dump. The PID needs to be set always. Do it here.
306+
* The main process of the container is the process CRIU will checkpoint
307+
* and all of its children. */
308+
criu_set_pid (status->pid);
277309

278310
/* work_dir is the place CRIU will put its logfiles. If not explicitly set,
279311
* CRIU will put the logfiles into the images_dir from above. No need for
@@ -292,9 +324,54 @@ libcrun_container_checkpoint_linux_criu (libcrun_container_status_t *status, lib
292324
cr_options->work_path = cr_options->image_path;
293325
}
294326

295-
/* The main process of the container is the process CRIU will checkpoint
296-
* and all of its children. */
297-
criu_set_pid (status->pid);
327+
# ifdef CRIU_PRE_DUMP_SUPPORT
328+
329+
{
330+
int criu_can_mem_track = 0;
331+
/* If the user uses --pre-dump for the second time or does
332+
* a final dump from a previous pre-dump, setting parent_path
333+
* is necessary so that CRIU can find which pages have not
334+
* changed compared to the previous dump. */
335+
if (cr_options->parent_path != NULL)
336+
{
337+
criu_can_mem_track = criu_check_mem_track (cr_options->work_path, err);
338+
if (UNLIKELY (criu_can_mem_track == -1))
339+
return -1;
340+
criu_set_track_mem (true);
341+
/* The parent path needs to be a relative path. CRIU will fail
342+
* if the path is not in the right format. Usually something like
343+
* ../previous-dump */
344+
criu_set_parent_images (cr_options->parent_path);
345+
}
346+
347+
if (cr_options->pre_dump)
348+
{
349+
if (criu_can_mem_track != 1)
350+
{
351+
criu_can_mem_track = criu_check_mem_track (cr_options->work_path, err);
352+
if (UNLIKELY (criu_can_mem_track == -1))
353+
return -1;
354+
}
355+
criu_set_track_mem (true);
356+
ret = criu_pre_dump ();
357+
if (UNLIKELY (ret != 0))
358+
return crun_make_error (err, 0,
359+
"CRIU pre-dump failed %d. Please check CRIU logfile %s/%s",
360+
ret, cr_options->work_path, CRIU_CHECKPOINT_LOG_FILE);
361+
return 0;
362+
}
363+
}
364+
# endif
365+
366+
/* descriptors.json is needed during restore to correctly
367+
* reconnect stdin, stdout, stderr. */
368+
ret = append_paths (&descriptors_path, err, cr_options->image_path, DESCRIPTORS_FILENAME, NULL);
369+
if (UNLIKELY (ret < 0))
370+
return ret;
371+
372+
ret = write_file (descriptors_path, status->external_descriptors, strlen (status->external_descriptors), err);
373+
if (UNLIKELY (ret < 0))
374+
return crun_error_wrap (err, "error saving CRIU descriptors file");
298375

299376
ret = append_paths (&path, err, status->bundle, status->rootfs, NULL);
300377
if (UNLIKELY (ret < 0))
@@ -413,9 +490,6 @@ libcrun_container_checkpoint_linux_criu (libcrun_container_status_t *status, lib
413490
criu_set_orphan_pts_master (true);
414491
criu_set_manage_cgroups (true);
415492

416-
/* Set up logging. */
417-
criu_set_log_level (4);
418-
criu_set_log_file (CRIU_CHECKPOINT_LOG_FILE);
419493
ret = criu_dump ();
420494
if (UNLIKELY (ret != 0))
421495
return crun_make_error (err, 0,

0 commit comments

Comments
 (0)