forked from projectcalico/calico
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
81 lines (69 loc) · 3.13 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
//go:build cgo
// Copyright (c) 2022 Tigera, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"os"
"syscall"
)
// As more systems adopt cgroup2, k8s started to containerize each pod in a separate cgroup.
// This change prevents felix from attaching CTLB programs to cgroup ns correctly. To fix the issue,
// we need to mount root cgroup at /run/calico/cgroup (where felix expects it), not the one
// allocated by k8s to calico-node. This binary takes the following steps to solve it:
// - Enter the namespace root before mounting cgroup2 fs. (Usually, /proc/1/ns points to
// the root of all namespaces, however, we mount /proc/1 on host at /initproc on calico-node pod,
// so /initproc/ns is the root of namespaces.)
// - Mount root cgroups fs at /run/calico/cgroup.
// The following C code is executed as a cgo constructor which runs before the main function.
// The reason for this behavior is to set cgroup and mount namespace correctly, before mounting
// cgroup2 fs in the main function. Mount ns can only be changed in a single-thread process,
// so we need to change it by exploiting cgo constructor before Go runtime starts new threads.
// In addition, normal frameworks, like logrus, are not used in the go code to prevent:
// - any unexpected initialisation logic. This is important for setting mount ns
// correctly, as mentioned above.
// - unnecessary increase of the binary size, which currently is less than 2MB.
/*
#define _GNU_SOURCE
#include <sched.h>
#include <fcntl.h>
__attribute__((constructor)) void set_namespaces(void) {
// open /initproc/ns/cgroup, which is equivalent to /proc/1/ns/cgroup on host.
// Then run setns syscall to change the cgroup namespace to this value.
setns(open("/initproc/ns/cgroup", O_RDONLY, 0), CLONE_NEWCGROUP);
// open /initproc/ns/mnt, which is equivalent to /proc/1/ns/mnt on host.
// Then run setns syscall to change the mount namespace to this value.
setns(open("/initproc/ns/mnt", O_RDONLY, 0), CLONE_NEWNS);
} */
import "C"
func main() {
if len(os.Args) < 2 {
fmt.Printf("Usage: %s <mountpoint>\n", os.Args[0])
os.Exit(1)
}
mountPoint := os.Args[1]
fmt.Println("Trying to mount root cgroup fs.")
err := os.MkdirAll(mountPoint, 0700)
if err != nil {
fmt.Printf("Failed to prepare mount point: %v. err: %v.\n", mountPoint, err)
os.Exit(1)
}
fmt.Printf("Mount point %s is ready for mounting root cgroup2 fs.\n", mountPoint)
err = syscall.Mount("none", mountPoint, "cgroup2", 0, "")
if err != nil {
fmt.Printf("Failed to mount cgroup2 filesystem. err: %v.\n", err)
os.Exit(1)
}
fmt.Println("Successfully mounted root cgroup2 fs.")
}