/
container_checkpoint_linux.go
306 lines (267 loc) · 9.06 KB
/
container_checkpoint_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"archive/tar"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
crmetadata "github.com/checkpoint-restore/checkpointctl/lib"
"github.com/checkpoint-restore/go-criu/v7"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/runtime/v2/runc/options"
"github.com/containerd/containerd/v2/pkg/archive"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/containerd/v2/protobuf/proto"
ptypes "github.com/containerd/containerd/v2/protobuf/types"
"github.com/containerd/log"
v1 "github.com/opencontainers/image-spec/specs-go/v1"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"github.com/containerd/containerd/v2/client"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// PodCriuVersion is the version of CRIU needed for
// checkpointing and restoring containers out of and into Pods.
const podCriuVersion = 31600
// CheckForCriu uses CRIU's go bindings to check if the CRIU
// binary exists and if it at least the version Podman needs.
func checkForCriu(version int) error {
c := criu.MakeCriu()
criuVersion, err := c.GetCriuVersion()
if err != nil {
return fmt.Errorf("failed to check for criu version: %w", err)
}
if criuVersion >= version {
return nil
}
return fmt.Errorf("checkpoint/restore requires at least CRIU %d, current version is %d", version, criuVersion)
}
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (*runtime.CheckpointContainerResponse, error) {
start := time.Now()
if err := checkForCriu(podCriuVersion); err != nil {
// This is the wrong error message and needs to be adapted once
// Kubernetes (the e2e_node/checkpoint) test has been changed to
// handle too old or missing CRIU error messages.
log.G(ctx).WithError(err).Errorf("Failed to checkpoint container %q", r.GetContainerId())
return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented")
}
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
}
state := container.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return nil, fmt.Errorf(
"container %q is in %s state. only %s containers can be checkpointed",
r.GetContainerId(),
criContainerStateToString(state),
criContainerStateToString(runtime.ContainerState_CONTAINER_RUNNING),
)
}
imageRef := container.ImageRef
image, err := c.GetImage(imageRef)
if err != nil {
return nil, fmt.Errorf("getting container image failed: %w", err)
}
i, err := container.Container.Info(ctx)
if err != nil {
return nil, fmt.Errorf("get container info: %w", err)
}
configJSON, err := json.Marshal(&crmetadata.ContainerConfig{
ID: container.ID,
Name: container.Name,
RootfsImageName: func() string {
if len(image.References) > 0 {
return image.References[0]
}
return ""
}(),
RootfsImageRef: imageRef,
OCIRuntime: i.Runtime.Name,
RootfsImage: container.Config.GetImage().UserSpecifiedImage,
CheckpointedAt: time.Now(),
CreatedTime: i.CreatedAt,
})
if err != nil {
return nil, fmt.Errorf("generating container config JSON failed: %w", err)
}
task, err := container.Container.Task(ctx, nil)
if err != nil {
return nil, fmt.Errorf("failed to get task for container %q: %w", r.GetContainerId(), err)
}
img, err := task.Checkpoint(ctx, []client.CheckpointTaskOpts{withCheckpointOpts(i.Runtime.Name, c.getContainerRootDir(r.GetContainerId()))}...)
if err != nil {
return nil, fmt.Errorf("checkpointing container %q failed: %w", r.GetContainerId(), err)
}
// the checkpoint image has been provided as an index with manifests representing the tar of criu data, the rw layer, and the config
var (
index v1.Index
rawIndex []byte
targetDesc = img.Target()
contentStore = img.ContentStore()
)
rawIndex, err = content.ReadBlob(ctx, contentStore, targetDesc)
if err != nil {
return nil, fmt.Errorf("failed to retrieve checkpoint index blob from content store: %w", err)
}
if err = json.Unmarshal(rawIndex, &index); err != nil {
return nil, fmt.Errorf("failed to unmarshall blob into checkpoint data OCI index: %w", err)
}
cpPath := filepath.Join(c.getContainerRootDir(r.GetContainerId()), "ctrd-checkpoint")
if err := os.MkdirAll(cpPath, 0o700); err != nil {
return nil, err
}
defer os.RemoveAll(cpPath)
if err := os.WriteFile(filepath.Join(cpPath, crmetadata.ConfigDumpFile), configJSON, 0o600); err != nil {
return nil, err
}
// walk the manifests and pull out the blobs that we need to save in the checkpoint tarball:
// - the checkpoint criu data
// - the rw diff tarball
// - the spec blob
for _, manifest := range index.Manifests {
switch manifest.MediaType {
case images.MediaTypeContainerd1Checkpoint:
if err := writeCriuCheckpointData(ctx, contentStore, manifest, cpPath); err != nil {
return nil, fmt.Errorf("failed to copy CRIU checkpoint blob to checkpoint dir: %w", err)
}
case v1.MediaTypeImageLayerGzip:
if err := writeRootFsDiffTar(ctx, contentStore, manifest, cpPath); err != nil {
return nil, fmt.Errorf("failed to copy rw filesystem layer blob to checkpoint dir: %w", err)
}
case images.MediaTypeContainerd1CheckpointConfig:
if err := writeSpecDumpFile(ctx, contentStore, manifest, cpPath); err != nil {
return nil, fmt.Errorf("failed to copy container spec blob to checkpoint dir: %w", err)
}
default:
}
}
// write final tarball of all content
tar := archive.Diff(ctx, "", cpPath)
outFile, err := os.OpenFile(r.Location, os.O_RDWR|os.O_CREATE, 0600)
if err != nil {
return nil, err
}
defer outFile.Close()
_, err = io.Copy(outFile, tar)
if err != nil {
return nil, err
}
if err := tar.Close(); err != nil {
return nil, err
}
containerCheckpointTimer.WithValues(i.Runtime.Name).UpdateSince(start)
return &runtime.CheckpointContainerResponse{}, nil
}
func withCheckpointOpts(rt, rootDir string) client.CheckpointTaskOpts {
return func(r *client.CheckpointTaskInfo) error {
// Kubernetes currently supports checkpointing of container
// as part of the Forensic Container Checkpointing KEP.
// This implies that the container is never stopped
leaveRunning := true
switch rt {
case plugins.RuntimeRuncV2:
if r.Options == nil {
r.Options = &options.CheckpointOptions{}
}
opts, _ := r.Options.(*options.CheckpointOptions)
opts.Exit = !leaveRunning
opts.WorkPath = rootDir
}
return nil
}
}
func writeCriuCheckpointData(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
ra, err := store.ReaderAt(ctx, desc)
if err != nil {
return err
}
defer ra.Close()
checkpointDirectory := filepath.Join(cpPath, crmetadata.CheckpointDirectory)
// This is the criu data tarball. Let's unpack it
// and put it into the crmetadata.CheckpointDirectory directory.
if err := os.MkdirAll(checkpointDirectory, 0o700); err != nil {
return err
}
tr := tar.NewReader(content.NewReader(ra))
for {
header, err := tr.Next()
if err != nil {
if errors.Is(err, io.EOF) {
break
}
return err
}
if strings.Contains(header.Name, "..") {
return fmt.Errorf("found illegal string '..' in checkpoint archive")
}
destFile, err := os.Create(filepath.Join(checkpointDirectory, header.Name))
if err != nil {
return err
}
defer destFile.Close()
_, err = io.CopyN(destFile, tr, header.Size)
if err != nil {
return err
}
}
return nil
}
func writeRootFsDiffTar(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
ra, err := store.ReaderAt(ctx, desc)
if err != nil {
return err
}
defer ra.Close()
// the rw layer tarball
f, err := os.Create(filepath.Join(cpPath, crmetadata.RootFsDiffTar))
if err != nil {
return err
}
defer f.Close()
_, err = io.Copy(f, content.NewReader(ra))
if err != nil {
return err
}
return nil
}
func writeSpecDumpFile(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
// this is the container spec
f, err := os.Create(filepath.Join(cpPath, crmetadata.SpecDumpFile))
if err != nil {
return err
}
defer f.Close()
data, err := content.ReadBlob(ctx, store, desc)
if err != nil {
return err
}
var any ptypes.Any
if err := proto.Unmarshal(data, &any); err != nil {
return err
}
_, err = f.Write(any.Value)
if err != nil {
return err
}
return nil
}