forked from checkpoint-restore/criu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
kerndat.c
251 lines (205 loc) · 5.12 KB
/
kerndat.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <errno.h>
#include "log.h"
#include "bug.h"
#include "kerndat.h"
#include "fs-magic.h"
#include "mem.h"
#include "compiler.h"
#include "sysctl.h"
#include "asm/types.h"
#include "cr_options.h"
#include "util.h"
dev_t kerndat_shmem_dev;
/*
* Anonymous shared mappings are backed by hidden tmpfs
* mount. Find out its dev to distinguish such mappings
* from real tmpfs files maps.
*/
static int kerndat_get_shmemdev(void)
{
void *map;
char maps[128];
struct stat buf;
map = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, 0, 0);
if (map == MAP_FAILED) {
pr_perror("Can't mmap memory for shmemdev test");
return -1;
}
sprintf(maps, "/proc/self/map_files/%lx-%lx",
(unsigned long)map, (unsigned long)map + PAGE_SIZE);
if (stat(maps, &buf) < 0) {
munmap(map, PAGE_SIZE);
pr_perror("Can't stat self map_files");
return -1;
}
munmap(map, PAGE_SIZE);
kerndat_shmem_dev = buf.st_dev;
pr_info("Found anon-shmem device at %"PRIx64"\n", kerndat_shmem_dev);
return 0;
}
struct stat *kerndat_get_devpts_stat()
{
static struct stat st = {};
struct statfs fst;
if (st.st_dev != 0)
return &st;
if (statfs("/dev/pts", &fst)) {
pr_perror("Unable to statefs /dev/pts");
return NULL;
}
if (fst.f_type != DEVPTS_SUPER_MAGIC) {
pr_err("devpts isn't mount on the host\n");
return NULL;
}
/* The root /dev/pts is mounted w/o newinstance, isn't it? */
if (stat("/dev/pts", &st)) {
pr_perror("Unable to stat /dev/pts");
return NULL;
}
return &st;
}
/*
* Check whether pagemap reports soft dirty bit. Kernel has
* this functionality under CONFIG_MEM_SOFT_DIRTY option.
*/
bool kerndat_has_dirty_track = false;
int kerndat_get_dirty_track(void)
{
char *map;
int pm2;
u64 pmap = 0;
int ret = -1;
map = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (map == MAP_FAILED) {
pr_perror("Can't mmap memory for pagemap test");
return ret;
}
/*
* Kernel shows soft-dirty bits only if this soft-dirty
* was at least once re-set. (this is to be removed in
* a couple of kernel releases)
*/
do_task_reset_dirty_track(getpid());
pm2 = open("/proc/self/pagemap", O_RDONLY);
if (pm2 < 0) {
pr_perror("Can't open pagemap file");
munmap(map, PAGE_SIZE);
return ret;
}
map[0] = '\0';
lseek(pm2, (unsigned long)map / PAGE_SIZE * sizeof(u64), SEEK_SET);
ret = read(pm2, &pmap, sizeof(pmap));
if (ret < 0)
pr_perror("Read pmap err!");
close(pm2);
munmap(map, PAGE_SIZE);
if (pmap & PME_SOFT_DIRTY) {
pr_info("Dirty track supported on kernel\n");
kerndat_has_dirty_track = true;
} else {
pr_info("Dirty tracking support is OFF\n");
if (opts.track_mem) {
pr_err("Tracking memory is not available\n");
return -1;
}
}
return 0;
}
/*
* Strictly speaking, if there is a machine with huge amount
* of memory, we're allowed to send up to 4M and read up to
* 6M of tcp data at once. But we will figure out precise size
* of a limit a bit later when restore starts.
*
* Meanwhile set it up to 2M and 3M, which is safe enough to
* proceed without errors.
*/
int tcp_max_wshare = 2U << 20;
int tcp_max_rshare = 3U << 20;
static int tcp_read_sysctl_limits(void)
{
u32 vect[2][3] = { };
int ret;
struct sysctl_req req[] = {
{ "net/ipv4/tcp_wmem", &vect[0], CTL_U32A(ARRAY_SIZE(vect[0])) },
{ "net/ipv4/tcp_rmem", &vect[1], CTL_U32A(ARRAY_SIZE(vect[1])) },
{ },
};
/*
* Lets figure out which exactly amount of memory is
* availabe for send/read queues on restore.
*/
ret = sysctl_op(req, CTL_READ);
if (ret) {
pr_warn("TCP mem sysctls are not available. Using defaults.\n");
goto out;
}
tcp_max_wshare = min(tcp_max_wshare, (int)vect[0][2]);
tcp_max_rshare = min(tcp_max_rshare, (int)vect[1][2]);
if (tcp_max_wshare < 128 || tcp_max_rshare < 128)
pr_warn("The memory limits for TCP queues are suspiciously small\n");
out:
pr_debug("TCP queue memory limits are %d:%d\n", tcp_max_wshare, tcp_max_rshare);
return 0;
}
/* The page frame number (PFN) is constant for the zero page */
u64 zero_page_pfn;
static int init_zero_page_pfn()
{
void *addr;
int ret;
addr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED) {
pr_perror("Unable to map zero page");
return 0;
}
if (*((int *) addr) != 0) {
BUG();
return -1;
}
ret = vaddr_to_pfn((unsigned long)addr, &zero_page_pfn);
munmap(addr, PAGE_SIZE);
if (zero_page_pfn == 0)
ret = -1;
return ret;
}
int kerndat_init(void)
{
int ret;
ret = kerndat_get_shmemdev();
if (!ret)
ret = kerndat_get_dirty_track();
if (!ret)
ret = init_zero_page_pfn();
return ret;
}
int kern_last_cap;
int get_last_cap(void)
{
struct sysctl_req req[] = {
{ "kernel/cap_last_cap", &kern_last_cap, CTL_U32 },
{ },
};
return sysctl_op(req, CTL_READ);
}
int kerndat_init_rst(void)
{
int ret;
/*
* Read TCP sysctls before anything else,
* since the limits we're interested in are
* not available inside namespaces.
*/
ret = tcp_read_sysctl_limits();
if (!ret)
ret = get_last_cap();
return ret;
}