forked from k3s-io/kubernetes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
whitelist.go
171 lines (153 loc) · 5.2 KB
/
whitelist.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"fmt"
"strings"
"k8s.io/api/core/v1"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/apis/core/validation"
extvalidation "k8s.io/kubernetes/pkg/apis/extensions/validation"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
)
const (
AnnotationInvalidReason = "InvalidSysctlAnnotation"
ForbiddenReason = "SysctlForbidden"
)
// SafeSysctlWhitelist returns the whitelist of safe sysctls and safe sysctl patterns (ending in *).
//
// A sysctl is called safe iff
// - it is namespaced in the container or the pod
// - it is isolated, i.e. has no influence on any other pod on the same node.
func SafeSysctlWhitelist() []string {
return []string{
"kernel.shm_rmid_forced",
"net.ipv4.ip_local_port_range",
"net.ipv4.tcp_syncookies",
}
}
// Whitelist provides a list of allowed sysctls and sysctl patterns (ending in *)
// and a function to check whether a given sysctl matches this list.
type Whitelist interface {
// Validate checks that all sysctls given in a v1.SysctlsPodAnnotationKey annotation
// are valid according to the whitelist.
Validate(pod *v1.Pod) error
}
// patternWhitelist takes a list of sysctls or sysctl patterns (ending in *) and
// checks validity via a sysctl and prefix map, rejecting those which are not known
// to be namespaced.
type patternWhitelist struct {
sysctls map[string]Namespace
prefixes map[string]Namespace
annotationKey string
}
var _ lifecycle.PodAdmitHandler = &patternWhitelist{}
// NewWhitelist creates a new Whitelist from a list of sysctls and sysctl pattern (ending in *).
func NewWhitelist(patterns []string, annotationKey string) (*patternWhitelist, error) {
w := &patternWhitelist{
sysctls: map[string]Namespace{},
prefixes: map[string]Namespace{},
annotationKey: annotationKey,
}
for _, s := range patterns {
if !extvalidation.IsValidSysctlPattern(s) {
return nil, fmt.Errorf("sysctl %q must have at most %d characters and match regex %s",
s,
validation.SysctlMaxLength,
extvalidation.SysctlPatternFmt,
)
}
if strings.HasSuffix(s, "*") {
prefix := s[:len(s)-1]
ns := NamespacedBy(prefix)
if ns == UnknownNamespace {
return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", s)
}
w.prefixes[prefix] = ns
} else {
ns := NamespacedBy(s)
if ns == UnknownNamespace {
return nil, fmt.Errorf("the sysctl %q are not known to be namespaced", s)
}
w.sysctls[s] = ns
}
}
return w, nil
}
// validateSysctl checks that a sysctl is whitelisted because it is known
// to be namespaced by the Linux kernel. Note that being whitelisted is required, but not
// sufficient: the container runtime might have a stricter check and refuse to launch a pod.
//
// The parameters hostNet and hostIPC are used to forbid sysctls for pod sharing the
// respective namespaces with the host. This check is only possible for sysctls on
// the static default whitelist, not those on the custom whitelist provided by the admin.
func (w *patternWhitelist) validateSysctl(sysctl string, hostNet, hostIPC bool) error {
nsErrorFmt := "%q not allowed with host %s enabled"
if ns, found := w.sysctls[sysctl]; found {
if ns == IpcNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
if ns == NetNamespace && hostNet {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
return nil
}
for p, ns := range w.prefixes {
if strings.HasPrefix(sysctl, p) {
if ns == IpcNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
if ns == NetNamespace && hostNet {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
return nil
}
}
return fmt.Errorf("%q not whitelisted", sysctl)
}
// Admit checks that all sysctls given in a v1.SysctlsPodAnnotationKey annotation
// are valid according to the whitelist.
func (w *patternWhitelist) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
pod := attrs.Pod
a := pod.Annotations[w.annotationKey]
if a == "" {
return lifecycle.PodAdmitResult{
Admit: true,
}
}
sysctls, err := v1helper.SysctlsFromPodAnnotation(a)
if err != nil {
return lifecycle.PodAdmitResult{
Admit: false,
Reason: AnnotationInvalidReason,
Message: fmt.Sprintf("invalid %s annotation: %v", w.annotationKey, err),
}
}
var hostNet, hostIPC bool
if pod.Spec.SecurityContext != nil {
hostNet = pod.Spec.HostNetwork
hostIPC = pod.Spec.HostIPC
}
for _, s := range sysctls {
if err := w.validateSysctl(s.Name, hostNet, hostIPC); err != nil {
return lifecycle.PodAdmitResult{
Admit: false,
Reason: ForbiddenReason,
Message: fmt.Sprintf("forbidden sysctl: %v", err),
}
}
}
return lifecycle.PodAdmitResult{
Admit: true,
}
}