From 9fe9d2211aab2e5442d4bfe5673fcfcc5a73f958 Mon Sep 17 00:00:00 2001 From: Volker Simonis Date: Sun, 14 Jan 2024 20:55:34 +0100 Subject: [PATCH] Don't dump pages which only contain zero bytes Signed-off-by: Volker Simonis --- criu/config.c | 1 + criu/crtools.c | 6 ++ criu/include/cr_options.h | 1 + criu/mem.c | 50 ++++++++++- test/javaTests/pom.xml | 1 + .../criu/java/tests/CheckpointRestore.java | 2 +- test/javaTests/test-zero.xml | 89 +++++++++++++++++++ 7 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 test/javaTests/test-zero.xml diff --git a/criu/config.c b/criu/config.c index 1322a490ab..2077325960 100644 --- a/criu/config.c +++ b/criu/config.c @@ -650,6 +650,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, { "ms", no_argument, 0, 1054 }, BOOL_OPT("track-mem", &opts.track_mem), BOOL_OPT("auto-dedup", &opts.auto_dedup), + BOOL_OPT("zero-pages", &opts.zero_pages), { "libdir", required_argument, 0, 'L' }, { "cpu-cap", optional_argument, 0, 1057 }, BOOL_OPT("force-irmap", &opts.force_irmap), diff --git a/criu/crtools.c b/criu/crtools.c index 94657f4186..29ca20a141 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -541,6 +541,12 @@ int main(int argc, char *argv[], char *envp[]) " pages images of previous dump\n" " when used on restore, as soon as page is restored, it\n" " will be punched from the image\n" + " --zero-pages don't dump pages containing only zero bytes. This is a\n" + " potentially expensive operation because it checks for\n" + " every single process page if it contains only zeros but\n" + " it can significantly decrease the image size if many\n" + " such pages exist. It effectively replaces such pages\n" + " which the kernel's zero-page on restore.\n" " --pre-dump-mode splice - parasite based pre-dumping (default)\n" " read - process_vm_readv syscall based pre-dumping\n" "\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 60cf9437e6..e3e13b62b6 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -157,6 +157,7 @@ struct cr_options { int track_mem; char *img_parent; int auto_dedup; + int zero_pages; unsigned int cpu_cap; int force_irmap; char **exec_cmd; diff --git a/criu/mem.c b/criu/mem.c index 417e0a21de..7695bb0068 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include #include "types.h" #include "cr_options.h" @@ -31,6 +33,7 @@ #include "prctl.h" #include "compel/infect-util.h" #include "pidfd-store.h" +#include "xmalloc.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" @@ -172,6 +175,27 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct unsigned long pages[3] = {}; int ret = 0; + static char *ZERO_PAGE = NULL; + static char *REMOTE_PAGE = NULL; + int zero = 0; + struct iovec local[2]; + struct iovec remote[1]; + int nread = 0; + if (opts.zero_pages && ZERO_PAGE == NULL) { + ZERO_PAGE = xmalloc(PAGE_SIZE); + REMOTE_PAGE = xmalloc(PAGE_SIZE); + if (ZERO_PAGE == NULL || REMOTE_PAGE == NULL) { + pr_warn("Can't allocate memory - disabling --zero-pages\n"); + opts.zero_pages = 0; + } else { + memzero(ZERO_PAGE, PAGE_SIZE); + local[0].iov_base = REMOTE_PAGE; + local[0].iov_len = PAGE_SIZE; + remote[0].iov_base = (void *) 0x0; + remote[0].iov_len = PAGE_SIZE; + } + } + nr_to_scan = (vma_area_len(vma) - *off) / PAGE_SIZE; for (pfn = 0; pfn < nr_to_scan; pfn++) { @@ -179,10 +203,30 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct unsigned int ppb_flags = 0; int st; - if (!should_dump_page(vma->e, at[pfn])) - continue; - vaddr = vma->e->start + *off + pfn * PAGE_SIZE; + /* + * If should_dump_page() returns true, it means the page is in the dumpees resident memory + * (i.e. bit 63 of the page frame number 'at[pfn]' is set) but it is not the zero-page. + */ + if (should_dump_page(vma->e, at[pfn])) { + if (opts.zero_pages) { + remote[0].iov_base = (void*)vaddr; + nread = process_vm_readv(item->pid->real, local, 1, remote, 1, 0); + if (nread == PAGE_SIZE) { + zero = memcmp(ZERO_PAGE, REMOTE_PAGE, PAGE_SIZE); + /* + * If the page contains just zeros we can treat it like the zero page and skip it. + * At restore it will be replaced by a reference to the zero page and COWed if accessed. + */ + if (zero == 0) { + pr_info("Zero page detected at virtual addr = %p\n", (void*)vaddr); + continue; + } + } + } + } else { + continue; + } if (vma_entry_can_be_lazy(vma->e) && !is_stack(item, vaddr)) ppb_flags |= PPB_LAZY; diff --git a/test/javaTests/pom.xml b/test/javaTests/pom.xml index ddb6c89cf1..8a0b5bd4b3 100644 --- a/test/javaTests/pom.xml +++ b/test/javaTests/pom.xml @@ -18,6 +18,7 @@ test.xml + test-zero.xml diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java index 860619c267..6c22c7260b 100644 --- a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -112,7 +112,7 @@ public void runtest(String testName, String checkpointOpt, String restoreOpt) th String pid; int exitCode; - System.out.println("======= Testing " + testName + " ========"); + System.out.println("======= Testing " + testName + " " + checkpointOpt + " ========"); testSetup(testName); diff --git a/test/javaTests/test-zero.xml b/test/javaTests/test-zero.xml new file mode 100644 index 0000000000..e8d1b7557e --- /dev/null +++ b/test/javaTests/test-zero.xml @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +