/
cri.go
1509 lines (1261 loc) · 51.1 KB
/
cri.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package v1alpha2
import (
"bytes"
"context"
"encoding/json"
"fmt"
"os"
"path"
"path/filepath"
"reflect"
goruntime "runtime"
"time"
"github.com/alibaba/pouch/apis/filters"
apitypes "github.com/alibaba/pouch/apis/types"
anno "github.com/alibaba/pouch/cri/annotations"
runtime "github.com/alibaba/pouch/cri/apis/v1alpha2"
"github.com/alibaba/pouch/cri/metrics"
cni "github.com/alibaba/pouch/cri/ocicni"
"github.com/alibaba/pouch/cri/stream"
metatypes "github.com/alibaba/pouch/cri/v1alpha2/types"
"github.com/alibaba/pouch/ctrd"
"github.com/alibaba/pouch/daemon/config"
"github.com/alibaba/pouch/daemon/mgr"
"github.com/alibaba/pouch/hookplugins"
"github.com/alibaba/pouch/pkg/errtypes"
"github.com/alibaba/pouch/pkg/log"
"github.com/alibaba/pouch/pkg/meta"
"github.com/alibaba/pouch/pkg/reference"
pkgstreams "github.com/alibaba/pouch/pkg/streams"
"github.com/alibaba/pouch/pkg/utils"
util_metrics "github.com/alibaba/pouch/pkg/utils/metrics"
"github.com/alibaba/pouch/version"
"github.com/pkg/errors"
)
const (
pouchRuntimeName = "pouch"
kubeletRuntimeAPIVersion = "0.1.0"
// kubePrefix is used to identify the containers/sandboxes on the node managed by kubelet.
kubePrefix = "k8s"
// annotationPrefix is used to distinguish between annotations and labels.
annotationPrefix = "annotation."
// Internal pouch labels used to identify whether a container is a sandbox
// or a regular container.
containerTypeLabelKey = "io.kubernetes.pouch.type"
containerTypeLabelSandbox = "sandbox"
containerTypeLabelContainer = "container"
sandboxIDLabelKey = "io.kubernetes.sandbox.id"
containerLogPathLabelKey = "io.kubernetes.container.logpath"
// sandboxContainerName is a string to include in the pouch container so
// that users can easily identify the sandboxes.
sandboxContainerName = "POD"
// nameDelimiter is used to construct pouch container names.
nameDelimiter = "_"
namespaceModeHost = "host"
// resolvConfPath is the abs path of resolv.conf on host or container.
resolvConfPath = "/etc/resolv.conf"
// snapshotPlugin implements a snapshotter.
snapshotPlugin = "io.containerd.snapshotter.v1"
// networkNotReadyReason is the reason reported when network is not ready.
networkNotReadyReason = "NetworkPluginNotReady"
// maxMsgSize is the max size syncExec could output
maxMsgSize = 1024 * 1024 * 64
)
var (
// Default timeout for stopping container.
defaultStopTimeout = int64(10)
)
// CriMgr as an interface defines all operations against CRI.
type CriMgr interface {
// RuntimeServiceServer is interface of CRI runtime service.
runtime.RuntimeServiceServer
// ImageServiceServer is interface of CRI image service.
runtime.ImageServiceServer
// VolumeServiceServer is interface of CRI volume service.
runtime.VolumeServiceServer
// StreamServerStart starts the stream server of CRI.
StreamServerStart() error
// StreamStart returns the router of Stream Server.
StreamRouter() stream.Router
}
// CriManager is an implementation of interface CriMgr.
type CriManager struct {
ContainerMgr mgr.ContainerMgr
ImageMgr mgr.ImageMgr
VolumeMgr mgr.VolumeMgr
CniMgr cni.CniMgr
CriPlugin hookplugins.CriPlugin
// StreamServer is the stream server of CRI serves container streaming request.
StreamServer StreamServer
// SandboxBaseDir is the directory used to store sandbox files like /etc/hosts, /etc/resolv.conf, etc.
SandboxBaseDir string
// SandboxImage is the image used by sandbox container.
SandboxImage string
// SandboxStore stores the configuration of sandboxes.
SandboxStore *meta.Store
// SnapshotStore stores information of all snapshots.
SnapshotStore *mgr.SnapshotStore
// imageFSPath is the path to image filesystem.
imageFSPath string
// DaemonConfig is the config of daemon
DaemonConfig *config.Config
}
// NewCriManager creates a brand new cri manager.
func NewCriManager(config *config.Config, ctrMgr mgr.ContainerMgr, imgMgr mgr.ImageMgr, volumeMgr mgr.VolumeMgr, criPlugin hookplugins.CriPlugin) (CriMgr, error) {
streamCfg, err := toStreamConfig(config)
if err != nil {
return nil, err
}
streamServer, err := NewStreamServer(streamCfg, stream.NewStreamRuntime(ctrMgr))
if err != nil {
return nil, fmt.Errorf("failed to create stream server for cri manager: %v", err)
}
c := &CriManager{
ContainerMgr: ctrMgr,
ImageMgr: imgMgr,
VolumeMgr: volumeMgr,
CriPlugin: criPlugin,
StreamServer: streamServer,
SandboxBaseDir: path.Join(config.HomeDir, "sandboxes"),
SandboxImage: config.CriConfig.SandboxImage,
SnapshotStore: mgr.NewSnapshotStore(),
DaemonConfig: config,
}
c.CniMgr, err = cni.NewCniManager(&config.CriConfig)
if err != nil {
return nil, fmt.Errorf("failed to create cni manager: %v", err)
}
c.SandboxStore, err = meta.NewStore(meta.Config{
Driver: "local",
BaseDir: path.Join(config.HomeDir, "sandboxes-meta"),
Buckets: []meta.Bucket{
{
Name: meta.MetaJSONFile,
Type: reflect.TypeOf(metatypes.SandboxMeta{}),
},
},
})
if err != nil {
return nil, fmt.Errorf("failed to create sandbox meta store: %v", err)
}
c.imageFSPath = imageFSPath(path.Join(config.HomeDir, "containerd/root"), ctrd.CurrentSnapshotterName(context.TODO()))
log.With(nil).Infof("Get image filesystem path %q", c.imageFSPath)
if config.CriConfig.EnableCriStatsCollect {
period := config.CriConfig.CriStatsCollectPeriod
if period <= 0 {
return nil, fmt.Errorf("cri stats collect period should > 0")
}
snapshotsSyncer := ctrMgr.NewSnapshotsSyncer(
c.SnapshotStore,
time.Duration(period)*time.Second,
)
snapshotsSyncer.Start()
} else {
log.With(nil).Infof("disable cri to collect stats from containerd periodically")
}
return c, nil
}
// StreamServerStart starts the stream server of CRI.
func (c *CriManager) StreamServerStart() error {
return c.StreamServer.Start()
}
// StreamRouter returns the router of Stream StreamServer.
func (c *CriManager) StreamRouter() stream.Router {
return c.StreamServer
}
// TODO: Move the underlying functions to their respective files in the future.
// Version returns the runtime name, runtime version and runtime API version.
func (c *CriManager) Version(ctx context.Context, r *runtime.VersionRequest) (*runtime.VersionResponse, error) {
return &runtime.VersionResponse{
Version: kubeletRuntimeAPIVersion,
RuntimeName: pouchRuntimeName,
RuntimeVersion: version.Version,
RuntimeApiVersion: version.APIVersion,
}, nil
}
// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
func (c *CriManager) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (_ *runtime.RunPodSandboxResponse, retErr error) {
label := util_metrics.ActionRunLabel
defer func(start time.Time) {
metrics.PodActionsCounter.WithLabelValues(label).Inc()
metrics.PodActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
config := r.GetConfig()
if config.GetMetadata() == nil {
return nil, fmt.Errorf("sandbox metadata required")
}
// Step 1: Prepare image for the sandbox.
image := c.SandboxImage
// Make sure the sandbox image exists.
err := c.ensureSandboxImageExists(ctx, image)
if err != nil {
return nil, err
}
// prepare the sandboxID and store it.
id, err := c.generateSandboxID(ctx)
if err != nil {
return nil, err
}
sandboxMeta := &metatypes.SandboxMeta{
ID: id,
}
if err := c.SandboxStore.Put(sandboxMeta); err != nil {
return nil, err
}
// If running sandbox failed, clean up the sandboxMeta from sandboxStore.
// We should clean it until the container has been removed successfully by Pouchd.
removeContainerErr := false
defer func() {
if retErr != nil && !removeContainerErr {
if err := c.SandboxStore.Remove(id); err != nil {
log.With(ctx).Errorf("failed to remove the metadata of container %q from sandboxStore: %v", id, err)
}
}
}()
// Step 2: Setup networking for the sandbox.
// If it is in host network, no need to configure the network of sandbox.
if sandboxNetworkMode(config) != runtime.NamespaceMode_NODE {
sandboxMeta.NetNS, err = c.CniMgr.NewNetNS()
if err != nil {
return nil, err
}
defer func() {
if retErr != nil {
if err := c.CniMgr.RemoveNetNS(sandboxMeta.NetNS); err != nil {
log.With(ctx).Errorf("failed to remove net ns for sandbox %q: %v", id, err)
}
}
}()
if err := c.setupPodNetwork(id, sandboxMeta.NetNS, config); err != nil {
return nil, err
}
defer func() {
if retErr != nil {
if err := c.teardownNetwork(id, sandboxMeta.NetNS, config); err != nil {
log.With(ctx).Errorf("failed to teardown pod network for sandbox %q: %v", id, err)
}
}
}()
}
// Step 3: Create the sandbox container.
// applies the runtime of container specified by the caller.
if err := c.applySandboxRuntimeHandler(sandboxMeta, r.GetRuntimeHandler(), config.GetAnnotations()); err != nil {
return nil, err
}
// applies the annotations extended.
if err := c.applySandboxAnnotations(sandboxMeta, config.GetAnnotations()); err != nil {
return nil, err
}
createConfig, err := makeSandboxPouchConfig(config, sandboxMeta, image)
if err != nil {
return nil, fmt.Errorf("failed to make sandbox pouch config for pod %q: %v", config.GetMetadata().GetName(), err)
}
createConfig.SpecificID = id
sandboxName := makeSandboxName(config)
_, err = c.ContainerMgr.Create(ctx, sandboxName, createConfig)
if err != nil {
return nil, fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
}
sandboxMeta.Config = config
if err := c.SandboxStore.Put(sandboxMeta); err != nil {
return nil, err
}
// If running sandbox failed, clean up the container.
defer func() {
if retErr != nil {
if err := c.ContainerMgr.Remove(ctx, id, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true}); err != nil {
removeContainerErr = true
log.With(ctx).Errorf("failed to remove container when running sandbox failed %q: %v", id, err)
}
}
}()
// Step 4: Start the sandbox container.
err = c.ContainerMgr.Start(ctx, id, &apitypes.ContainerStartOptions{})
if err != nil {
return nil, fmt.Errorf("failed to start sandbox container for pod %q: %v", config.GetMetadata().GetName(), err)
}
sandboxRootDir := path.Join(c.SandboxBaseDir, id)
err = os.MkdirAll(sandboxRootDir, 0755)
if err != nil {
return nil, fmt.Errorf("failed to create sandbox root directory: %v", err)
}
defer func() {
// If running sandbox failed, clean up the sandbox directory.
if retErr != nil {
if err := os.RemoveAll(sandboxRootDir); err != nil {
log.With(ctx).Errorf("failed to clean up the directory of sandbox %q: %v", id, err)
}
}
}()
// Setup sandbox file /etc/resolv.conf.
err = setupSandboxFiles(sandboxRootDir, config)
if err != nil {
return nil, fmt.Errorf("failed to setup sandbox files: %v", err)
}
metrics.PodSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
}
// StartPodSandbox restart a sandbox pod which was stopped by accident
// and we should reconfigure it with network plugin which will make sure it reacquire its original network configuration,
// like IP address.
func (c *CriManager) StartPodSandbox(ctx context.Context, r *runtime.StartPodSandboxRequest) (_ *runtime.StartPodSandboxResponse, retErr error) {
label := util_metrics.ActionStartLabel
defer func(start time.Time) {
metrics.PodActionsCounter.WithLabelValues(label).Inc()
metrics.PodActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
podSandboxID := r.GetPodSandboxId()
sandbox, err := c.ContainerMgr.Get(ctx, podSandboxID)
if err != nil {
return nil, fmt.Errorf("failed to get container %q: %v", podSandboxID, err)
}
res, err := c.SandboxStore.Get(podSandboxID)
if err != nil {
return nil, fmt.Errorf("failed to get metadata of %q from SandboxStore: %v", podSandboxID, err)
}
sandboxMeta := res.(*metatypes.SandboxMeta)
if mgr.IsNetNS(sandbox.HostConfig.NetworkMode) {
ip, _ := c.CniMgr.GetPodNetworkStatus(sandboxMeta.NetNS)
// recover network if it is down.
if ip == "" {
if err := c.CniMgr.RecoverNetNS(sandboxMeta.NetNS); err != nil {
return nil, fmt.Errorf("failed to recover netns %s for sandbox %q: %v", sandboxMeta.NetNS, podSandboxID, err)
}
defer func() {
if retErr != nil {
if err := c.CniMgr.RemoveNetNS(sandboxMeta.NetNS); err != nil {
log.With(ctx).Errorf("failed to remove net ns for sandbox %q: %v", podSandboxID, err)
}
}
}()
if err = c.setupPodNetwork(podSandboxID, sandboxMeta.NetNS, sandboxMeta.Config); err != nil {
return nil, err
}
defer func() {
if retErr != nil {
if err := c.teardownNetwork(podSandboxID, sandboxMeta.NetNS, sandboxMeta.Config); err != nil {
log.With(ctx).Errorf("failed to teardown pod network for sandbox %q: %v", podSandboxID, err)
}
}
}()
}
}
// start PodSandbox.
startErr := c.ContainerMgr.Start(ctx, podSandboxID, &apitypes.ContainerStartOptions{})
if startErr != nil {
return nil, fmt.Errorf("failed to start podSandbox %q: %v", podSandboxID, startErr)
}
defer func() {
if retErr != nil {
stopErr := c.ContainerMgr.Stop(ctx, podSandboxID, defaultStopTimeout)
if stopErr != nil {
log.With(ctx).Errorf("failed to stop sandbox %q: %v", podSandboxID, stopErr)
}
}
}()
// legacy container using /proc/$pid/ns/net as the sandbox netns.
if mgr.IsNone(sandbox.HostConfig.NetworkMode) {
if err = c.setupPodNetwork(podSandboxID, containerNetns(sandbox), sandboxMeta.Config); err != nil {
return nil, err
}
}
// Setup sandbox file /etc/resolv.conf again to ensure resolv.conf is right
sandboxRootDir := path.Join(c.SandboxBaseDir, sandbox.ID)
err = setupSandboxFiles(sandboxRootDir, sandboxMeta.Config)
if err != nil {
return nil, fmt.Errorf("failed to setup sandbox files: %v", err)
}
metrics.PodSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.StartPodSandboxResponse{}, nil
}
// StopPodSandbox stops the sandbox. If there are any running containers in the
// sandbox, they should be forcibly terminated.
// notes:
// 1. for legacy dockershim style container, lifecycle of podNetwork is bound to container
// using /proc/$pid/ns/net. When stopping sandbox, we first teardown the pod network, then stop
// the sandbox container.
// 2. In newly implementation. We first create an empty netns and setup pod network inside it,
// which is independent from container lifecycle. When stopping sandbox, we first stop container,
// then teardown the pod network, which is a reverse operation of RunPodSandbox.
func (c *CriManager) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandboxRequest) (*runtime.StopPodSandboxResponse, error) {
label := util_metrics.ActionStopLabel
defer func(start time.Time) {
metrics.PodActionsCounter.WithLabelValues(label).Inc()
metrics.PodActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
podSandboxID := r.GetPodSandboxId()
res, err := c.SandboxStore.Get(podSandboxID)
if err != nil {
return nil, fmt.Errorf("failed to get metadata of %q from SandboxStore: %v", podSandboxID, err)
}
sandboxMeta := res.(*metatypes.SandboxMeta)
opts := &mgr.ContainerListOption{All: true}
filter := func(c *mgr.Container) bool {
return c.Config.Labels[sandboxIDLabelKey] == podSandboxID
}
opts.FilterFunc = filter
containers, err := c.ContainerMgr.List(ctx, opts)
if err != nil {
return nil, fmt.Errorf("failed to get the containers belong to sandbox %q: %v", podSandboxID, err)
}
// Stop all containers in the sandbox.
for _, container := range containers {
err = c.ContainerMgr.Stop(ctx, container.ID, defaultStopTimeout)
if err != nil {
if errtypes.IsNotfound(err) {
log.With(ctx).Warningf("container %q of sandbox %q not found", container.ID, podSandboxID)
continue
}
return nil, fmt.Errorf("failed to stop container %q of sandbox %q: %v", container.ID, podSandboxID, err)
}
log.With(ctx).Infof("success to stop container %q of sandbox %q", container.ID, podSandboxID)
}
// Teardown network of the legacy dockershim style pod, if it is not in host network mode.
if sandboxNetworkMode(sandboxMeta.Config) != runtime.NamespaceMode_NODE && sandboxMeta.NetNS == "" {
container, err := c.ContainerMgr.Get(ctx, podSandboxID)
if err != nil {
return nil, err
}
if err = c.teardownNetwork(podSandboxID, containerNetns(container), sandboxMeta.Config); err != nil {
return nil, fmt.Errorf("failed to teardown network of sandbox %s, ns path %s: %v", podSandboxID, sandboxMeta.NetNS, err)
}
}
// Stop the sandbox container.
err = c.ContainerMgr.Stop(ctx, podSandboxID, defaultStopTimeout)
// if the sandbox container has been removed by 'pouch rm', treat this situation as success
// in order to teardown the network.
if err != nil {
if errtypes.IsNotfound(err) {
log.With(ctx).Warningf("sandbox container %q not found", podSandboxID)
} else {
return nil, fmt.Errorf("failed to stop sandbox %q: %v", podSandboxID, err)
}
}
// After container stop, no one refer the net namespace, do the clean up job.
if sandboxNetworkMode(sandboxMeta.Config) != runtime.NamespaceMode_NODE && sandboxMeta.NetNS != "" {
if err := c.teardownNetwork(podSandboxID, sandboxMeta.NetNS, sandboxMeta.Config); err != nil {
return nil, fmt.Errorf("failed to teardown network of sandbox %s, ns path %s: %v", podSandboxID, sandboxMeta.NetNS, err)
}
if err := c.CniMgr.CloseNetNS(sandboxMeta.NetNS); err != nil {
return nil, fmt.Errorf("failed to close net ns %s of sandbox %q: %v", sandboxMeta.NetNS, podSandboxID, err)
}
if err := c.CniMgr.RemoveNetNS(sandboxMeta.NetNS); err != nil {
return nil, fmt.Errorf("failed to remove net ns %s of sandbox %q: %v", sandboxMeta.NetNS, podSandboxID, err)
}
}
metrics.PodSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.StopPodSandboxResponse{}, nil
}
// RemovePodSandbox removes the sandbox. If there are running containers in the
// sandbox, they should be forcibly removed.
func (c *CriManager) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) {
label := util_metrics.ActionRemoveLabel
defer func(start time.Time) {
metrics.PodActionsCounter.WithLabelValues(label).Inc()
metrics.PodActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
podSandboxID := r.GetPodSandboxId()
opts := &mgr.ContainerListOption{All: true}
filter := func(c *mgr.Container) bool {
return c.Config.Labels[sandboxIDLabelKey] == podSandboxID
}
opts.FilterFunc = filter
containers, err := c.ContainerMgr.List(ctx, opts)
if err != nil {
return nil, fmt.Errorf("failed to remove sandbox %q: %v", podSandboxID, err)
}
// Remove all containers in the sandbox.
for _, container := range containers {
if err := c.ContainerMgr.Remove(ctx, container.ID, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true}); err != nil {
if errtypes.IsNotfound(err) {
log.With(ctx).Warningf("container %q of sandbox %q not found", container.ID, podSandboxID)
continue
}
return nil, fmt.Errorf("failed to remove container %q of sandbox %q: %v", container.ID, podSandboxID, err)
}
log.With(ctx).Infof("success to remove container %q of sandbox %q", container.ID, podSandboxID)
}
// Remove the sandbox container.
if err := c.ContainerMgr.Remove(ctx, podSandboxID, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true}); err != nil {
if errtypes.IsNotfound(err) {
log.With(ctx).Warningf("sandbox container %q not found", podSandboxID)
} else {
return nil, fmt.Errorf("failed to remove sandbox %q: %v", podSandboxID, err)
}
}
// Cleanup the sandbox root directory.
sandboxRootDir := path.Join(c.SandboxBaseDir, podSandboxID)
if err := os.RemoveAll(sandboxRootDir); err != nil {
return nil, fmt.Errorf("failed to remove root directory %q: %v", sandboxRootDir, err)
}
if err := c.SandboxStore.Remove(podSandboxID); err != nil {
return nil, fmt.Errorf("failed to remove meta %q: %v", sandboxRootDir, err)
}
metrics.PodSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.RemovePodSandboxResponse{}, nil
}
// PodSandboxStatus returns the status of the PodSandbox.
func (c *CriManager) PodSandboxStatus(ctx context.Context, r *runtime.PodSandboxStatusRequest) (*runtime.PodSandboxStatusResponse, error) {
label := util_metrics.ActionStatusLabel
defer func(start time.Time) {
metrics.PodActionsCounter.WithLabelValues(label).Inc()
metrics.PodActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
podSandboxID := r.GetPodSandboxId()
res, err := c.SandboxStore.Get(podSandboxID)
if err != nil {
return nil, fmt.Errorf("failed to get metadata of %q from SandboxStore: %v", podSandboxID, err)
}
sandboxMeta := res.(*metatypes.SandboxMeta)
// partially created sandbox.
// kubelet won't call this method because the partially created sandbox
// are removed from ListPodSandbox interface.
if sandboxMeta.Config == nil {
return nil, fmt.Errorf("failed to get status of partially sandbox %q: %v", podSandboxID, err)
}
sandbox, err := c.ContainerMgr.Get(ctx, podSandboxID)
if err != nil {
if errtypes.IsNotfound(err) {
return &runtime.PodSandboxStatusResponse{
Status: &runtime.PodSandboxStatus{
Id: podSandboxID,
State: runtime.PodSandboxState_SANDBOX_NOTFOUND,
Metadata: sandboxMeta.Config.Metadata,
CreatedAt: 1,
},
}, nil
}
return nil, fmt.Errorf("failed to get status of sandbox %q: %v", podSandboxID, err)
}
// Parse the timestamps.
createdAt, err := toCriTimestamp(sandbox.Created)
if err != nil {
return nil, fmt.Errorf("failed to parse timestamp for sandbox %q: %v", podSandboxID, err)
}
// Translate container to sandbox state.
state := runtime.PodSandboxState_SANDBOX_NOTREADY
if sandbox.State.Status == apitypes.StatusRunning {
state = runtime.PodSandboxState_SANDBOX_READY
}
labels, annotations := extractLabels(sandbox.Config.Labels)
nsOpts := sandboxMeta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions()
hostNet := nsOpts.GetNetwork() == runtime.NamespaceMode_NODE
var ip string
// No need to get ip for host network mode.
if !hostNet {
ip, err = c.CniMgr.GetPodNetworkStatus(containerNetns(sandbox))
if err != nil {
// Maybe the pod has been stopped.
log.With(ctx).Warnf("failed to get ip of sandbox %q: %v", podSandboxID, err)
}
}
if v, exist := annotations[anno.PassthruKey]; exist && v == "true" {
ip = annotations[anno.PassthruIP]
}
status := &runtime.PodSandboxStatus{
Id: podSandboxID,
State: state,
CreatedAt: createdAt,
Metadata: sandboxMeta.Config.Metadata,
Labels: labels,
Annotations: annotations,
Network: &runtime.PodSandboxNetworkStatus{Ip: ip},
Linux: &runtime.LinuxPodSandboxStatus{
Namespaces: &runtime.Namespace{
Options: &runtime.NamespaceOption{
Network: nsOpts.GetNetwork(),
Pid: nsOpts.GetPid(),
Ipc: nsOpts.GetIpc(),
},
},
},
}
metrics.PodSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.PodSandboxStatusResponse{Status: status}, nil
}
// ListPodSandbox returns a list of Sandbox.
func (c *CriManager) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) {
label := util_metrics.ActionListLabel
defer func(start time.Time) {
metrics.PodActionsCounter.WithLabelValues(label).Inc()
metrics.PodActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
sandboxMap, err := c.SandboxStore.List()
if err != nil {
return nil, fmt.Errorf("failed to list sandbox from SandboxStore: %v", err)
}
sandboxes := make([]*runtime.PodSandbox, 0, len(sandboxMap))
for id, metadata := range sandboxMap {
s, err := c.ContainerMgr.Get(ctx, id)
// metadata exists but container not found
if err != nil {
sm, ok := metadata.(*metatypes.SandboxMeta)
if !ok || sm == nil || sm.Config == nil {
// partially created sandbox.
continue
}
sandboxes = append(sandboxes, &runtime.PodSandbox{
Id: id,
Metadata: sm.Config.Metadata,
State: runtime.PodSandboxState_SANDBOX_NOTFOUND,
Labels: sm.Config.Labels,
Annotations: sm.Config.Annotations,
CreatedAt: 1,
})
continue
}
sandbox, err := toCriSandbox(s)
if err != nil {
log.With(ctx).Warningf("failed to parse state of sandbox %q: %v", id, err)
continue
}
sandboxes = append(sandboxes, sandbox)
}
result := filterCRISandboxes(sandboxes, r.GetFilter())
metrics.PodSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.ListPodSandboxResponse{Items: result}, nil
}
// CreateContainer creates a new container in the given PodSandbox.
func (c *CriManager) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (*runtime.CreateContainerResponse, error) {
label := util_metrics.ActionCreateLabel
defer func(start time.Time) {
metrics.ContainerActionsCounter.WithLabelValues(label).Inc()
metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
config := r.GetConfig()
if config.GetMetadata() == nil {
return nil, fmt.Errorf("container metadata required")
}
sandboxConfig := r.GetSandboxConfig()
podSandboxID := r.GetPodSandboxId()
// get sandbox
sandbox, err := c.ContainerMgr.Get(ctx, podSandboxID)
if err != nil {
return nil, fmt.Errorf("failed to get sandbox %q: %v", podSandboxID, err)
}
res, err := c.SandboxStore.Get(podSandboxID)
if err != nil {
return nil, fmt.Errorf("failed to get metadata of %q from SandboxStore: %v", podSandboxID, err)
}
sandboxMeta := res.(*metatypes.SandboxMeta)
sandboxMeta.NetNS = containerNetns(sandbox)
labels := makeLabels(config.GetLabels(), config.GetAnnotations())
// Apply the container type label.
labels[containerTypeLabelKey] = containerTypeLabelContainer
// Write the sandbox ID in the labels.
labels[sandboxIDLabelKey] = podSandboxID
// Get container log.
var logPath string
if config.GetLogPath() != "" {
logPath = filepath.Join(sandboxConfig.GetLogDirectory(), config.GetLogPath())
labels[containerLogPathLabelKey] = logPath
}
// compatible with both kubernetes and cri-o annotations
specAnnotation := make(map[string]string)
specAnnotation[anno.CRIOContainerType] = anno.ContainerTypeContainer
specAnnotation[anno.ContainerType] = anno.ContainerTypeContainer
specAnnotation[anno.CRIOSandboxName] = podSandboxID
specAnnotation[anno.CRIOSandboxID] = podSandboxID
specAnnotation[anno.SandboxID] = podSandboxID
resources := r.GetConfig().GetLinux().GetResources()
createConfig := &apitypes.ContainerCreateConfig{
ContainerConfig: apitypes.ContainerConfig{
Entrypoint: config.GetCommand(),
Cmd: config.GetArgs(),
Env: generateEnvList(config.GetEnvs()),
Image: config.GetImage().GetImage(),
WorkingDir: config.GetWorkingDir(),
Labels: labels,
// Interactive containers:
OpenStdin: config.GetStdin(),
StdinOnce: config.GetStdinOnce(),
Tty: config.GetTty(),
SpecAnnotation: specAnnotation,
NetPriority: config.GetNetPriority(),
DiskQuota: resources.GetDiskQuota(),
QuotaID: config.GetQuotaId(),
},
HostConfig: &apitypes.HostConfig{
Binds: generateMountBindings(config.GetMounts()),
Resources: parseResourcesFromCRI(resources),
},
NetworkingConfig: &apitypes.NetworkingConfig{},
}
err = c.updateCreateConfig(createConfig, config, sandboxConfig, sandboxMeta)
if err != nil {
return nil, err
}
// Bindings to overwrite the container's /etc/resolv.conf, /etc/hosts etc.
sandboxRootDir := path.Join(c.SandboxBaseDir, podSandboxID)
createConfig.HostConfig.Binds = append(createConfig.HostConfig.Binds, generateContainerMounts(sandboxRootDir)...)
var devices []*apitypes.DeviceMapping
for _, device := range config.GetDevices() {
devices = append(devices, &apitypes.DeviceMapping{
PathOnHost: device.GetHostPath(),
PathInContainer: device.GetContainerPath(),
CgroupPermissions: device.GetPermissions(),
})
}
createConfig.HostConfig.Resources.Devices = devices
containerName := makeContainerName(sandboxConfig, config)
// call cri plugin to update create config
if c.CriPlugin != nil {
if err := c.CriPlugin.PreCreateContainer(ctx, createConfig, sandboxMeta); err != nil {
return nil, err
}
}
createResp, err := c.ContainerMgr.Create(ctx, containerName, createConfig)
if err != nil {
return nil, fmt.Errorf("failed to create container for sandbox %q: %v", podSandboxID, err)
}
containerID := createResp.ID
defer func() {
// If the container failed to be created, clean up the container.
if err != nil {
removeErr := c.ContainerMgr.Remove(ctx, containerID, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true})
if removeErr != nil {
log.With(ctx).Errorf("failed to remove the container when creating container failed: %v", removeErr)
}
}
}()
if logPath != "" {
if err := c.ContainerMgr.AttachCRILog(ctx, containerID, logPath); err != nil {
return nil, err
}
}
metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.CreateContainerResponse{ContainerId: containerID}, nil
}
// StartContainer starts the container.
func (c *CriManager) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (*runtime.StartContainerResponse, error) {
label := util_metrics.ActionStartLabel
defer func(start time.Time) {
metrics.ContainerActionsCounter.WithLabelValues(label).Inc()
metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
containerID := r.GetContainerId()
err := c.ContainerMgr.Start(ctx, containerID, &apitypes.ContainerStartOptions{})
if err != nil {
return nil, fmt.Errorf("failed to start container %q: %v", containerID, err)
}
metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.StartContainerResponse{}, nil
}
// StopContainer stops a running container with a grace period (i.e., timeout).
func (c *CriManager) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
label := util_metrics.ActionStopLabel
defer func(start time.Time) {
metrics.ContainerActionsCounter.WithLabelValues(label).Inc()
metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
containerID := r.GetContainerId()
err := c.ContainerMgr.Stop(ctx, containerID, r.GetTimeout())
if err != nil {
return nil, fmt.Errorf("failed to stop container %q: %v", containerID, err)
}
metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.StopContainerResponse{}, nil
}
// RemoveContainer removes the container.
func (c *CriManager) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (*runtime.RemoveContainerResponse, error) {
label := util_metrics.ActionRemoveLabel
defer func(start time.Time) {
metrics.ContainerActionsCounter.WithLabelValues(label).Inc()
metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
containerID := r.GetContainerId()
if err := c.ContainerMgr.Remove(ctx, containerID, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true}); err != nil {
return nil, fmt.Errorf("failed to remove container %q: %v", containerID, err)
}
metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.RemoveContainerResponse{}, nil
}
// ListContainers lists all containers matching the filter.
func (c *CriManager) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) {
label := util_metrics.ActionListLabel
defer func(start time.Time) {
metrics.ContainerActionsCounter.WithLabelValues(label).Inc()
metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
opts := &mgr.ContainerListOption{All: true}
filter := func(c *mgr.Container) bool {
return c.Config.Labels[containerTypeLabelKey] == containerTypeLabelContainer
}
opts.FilterFunc = filter
// Filter *only* (non-sandbox) containers.
containerList, err := c.ContainerMgr.List(ctx, opts)
if err != nil {
return nil, fmt.Errorf("failed to list container: %v", err)
}
containers := make([]*runtime.Container, 0, len(containerList))
for _, c := range containerList {
container, err := toCriContainer(c)
if err != nil {
log.With(ctx).Warnf("failed to translate container %v to cri container in ListContainers: %v", c.ID, err)
continue
}
containers = append(containers, container)
}
result := filterCRIContainers(containers, r.GetFilter())
metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc()
return &runtime.ListContainersResponse{Containers: result}, nil
}
// ContainerStatus inspects the container and returns the status.
func (c *CriManager) ContainerStatus(ctx context.Context, r *runtime.ContainerStatusRequest) (*runtime.ContainerStatusResponse, error) {
label := util_metrics.ActionStatusLabel
defer func(start time.Time) {
metrics.ContainerActionsCounter.WithLabelValues(label).Inc()
metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds())
}(time.Now())
id := r.GetContainerId()
container, err := c.ContainerMgr.Get(ctx, id)
if err != nil {
return nil, fmt.Errorf("failed to get container status of %q: %v", id, err)
}
// Parse the timestamps.
var createdAt, startedAt, finishedAt int64
for _, item := range []struct {
t *int64
s string
}{
{t: &createdAt, s: container.Created},
{t: &startedAt, s: container.State.StartedAt},
{t: &finishedAt, s: container.State.FinishedAt},
} {
*item.t, err = toCriTimestamp(item.s)
if err != nil {
return nil, fmt.Errorf("failed to parse timestamp for container %q: %v", id, err)
}
}
// Convert the mounts.
mounts := make([]*runtime.Mount, 0, len(container.Mounts))
for _, m := range container.Mounts {
mounts = append(mounts, &runtime.Mount{
HostPath: m.Source,
ContainerPath: m.Destination,
Readonly: !m.RW,
Name: m.Name,
// Note: can't set SeLinuxRelabel.
})