/
spec.go
360 lines (327 loc) · 11.2 KB
/
spec.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
/*
Copyright 2018 The Rook Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mgr
import (
"fmt"
"os"
"strconv"
"strings"
rookcephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
rookalpha "github.com/rook/rook/pkg/apis/rook.io/v1alpha2"
"github.com/rook/rook/pkg/daemon/ceph/client"
"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/operator/ceph/config"
"github.com/rook/rook/pkg/operator/ceph/config/keyring"
opspec "github.com/rook/rook/pkg/operator/ceph/spec"
cephver "github.com/rook/rook/pkg/operator/ceph/version"
"github.com/rook/rook/pkg/operator/k8sutil"
apps "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
)
const (
podIPEnvVar = "ROOK_POD_IP"
)
func (c *Cluster) makeDeployment(mgrConfig *mgrConfig) *apps.Deployment {
logger.Debugf("mgrConfig: %+v", mgrConfig)
podSpec := v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Name: mgrConfig.ResourceName,
Labels: c.getPodLabels(mgrConfig.DaemonID),
},
Spec: v1.PodSpec{
InitContainers: []v1.Container{
c.makeChownInitContainer(mgrConfig),
},
Containers: []v1.Container{
c.makeMgrDaemonContainer(mgrConfig),
},
ServiceAccountName: serviceAccountName,
RestartPolicy: v1.RestartPolicyAlways,
Volumes: opspec.DaemonVolumes(mgrConfig.DataPathMap, mgrConfig.ResourceName),
HostNetwork: c.Network.IsHost(),
PriorityClassName: c.priorityClassName,
},
}
// Replace default unreachable node toleration
k8sutil.AddUnreachableNodeToleration(&podSpec.Spec)
// if the fix is needed, then the following init containers are created
// which explicitly configure the server_addr Ceph configuration option to
// be equal to the pod's IP address. Note that when the fix is not needed,
// there is additional work done to clear fixes after upgrades. See
// clearHttpBindFix() method for more details.
if c.needHTTPBindFix() {
podSpec.Spec.InitContainers = append(podSpec.Spec.InitContainers, []v1.Container{
c.makeSetServerAddrInitContainer(mgrConfig, "dashboard"),
c.makeSetServerAddrInitContainer(mgrConfig, "prometheus"),
}...)
// ceph config set commands want admin keyring
podSpec.Spec.Volumes = append(podSpec.Spec.Volumes,
keyring.Volume().Admin())
}
if c.Network.IsHost() {
podSpec.Spec.DNSPolicy = v1.DNSClusterFirstWithHostNet
}
c.annotations.ApplyToObjectMeta(&podSpec.ObjectMeta)
c.applyPrometheusAnnotations(&podSpec.ObjectMeta)
c.placement.ApplyToPodSpec(&podSpec.Spec)
replicas := int32(1)
d := &apps.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: mgrConfig.ResourceName,
Namespace: c.Namespace,
Labels: c.getPodLabels(mgrConfig.DaemonID),
},
Spec: apps.DeploymentSpec{
Selector: &metav1.LabelSelector{
MatchLabels: c.getPodLabels(mgrConfig.DaemonID),
},
Template: podSpec,
Replicas: &replicas,
Strategy: apps.DeploymentStrategy{
Type: apps.RecreateDeploymentStrategyType,
},
},
}
k8sutil.AddRookVersionLabelToDeployment(d)
opspec.AddCephVersionLabelToDeployment(c.clusterInfo.CephVersion, d)
k8sutil.SetOwnerRef(&d.ObjectMeta, &c.ownerRef)
return d
}
func (c *Cluster) needHTTPBindFix() bool {
needed := true
// if mimic and >= 13.2.6
if c.clusterInfo.CephVersion.IsMimic() &&
c.clusterInfo.CephVersion.IsAtLeast(cephver.CephVersion{Major: 13, Minor: 2, Extra: 6}) {
needed = false
}
// if >= 14.1.1
if c.clusterInfo.CephVersion.IsAtLeast(cephver.CephVersion{Major: 14, Minor: 1, Extra: 1}) {
needed = false
}
return needed
}
// if we do not need the http bind fix, then we need to be careful. if we are
// upgrading from a cluster that had the fix applied, then the fix is no longer
// needed, and furthermore, needs to be removed so that there is not a lingering
// ceph configuration option that contains an old ip. by clearing the option,
// we let ceph bind to its default ANYADDR address. However, since we don't
// know which version of Ceph we are may be upgrading _from_ we need to (a)
// always do this and (b) make sure that all forms of the configuration option
// are removed (see the init container factory method). Once the minimum
// supported version of Rook contains the fix, all of this can be removed.
func (c *Cluster) clearHTTPBindFix() error {
// We only need to apply these changes once. No harm in once each time the operator restarts.
if c.appliedHttpBind {
return nil
}
for _, daemonID := range c.getDaemonIDs() {
for _, module := range []string{"dashboard", "prometheus"} {
// there are two forms of the configuration key that might exist which
// depends not on the current version, but on the version that may be
// the version being upgraded from.
for _, ver := range []cephver.CephVersion{cephver.Mimic} {
client.MgrSetConfig(c.context, c.Namespace, daemonID, ver,
fmt.Sprintf("mgr/%s/server_addr", module), "", false)
// this is for the format used in v1.0
// https://github.com/rook/rook/commit/11d318fb2f77a6ac9a8f2b9be42c826d3b4a93c3
client.MgrSetConfig(c.context, c.Namespace, daemonID, ver,
fmt.Sprintf("mgr/%s/%s/server_addr", module, daemonID), "", false)
}
}
}
c.appliedHttpBind = true
return nil
}
func (c *Cluster) makeChownInitContainer(mgrConfig *mgrConfig) v1.Container {
return opspec.ChownCephDataDirsInitContainer(
*mgrConfig.DataPathMap,
c.cephVersion.Image,
opspec.DaemonVolumeMounts(mgrConfig.DataPathMap, mgrConfig.ResourceName),
c.resources,
mon.PodSecurityContext(),
)
}
func (c *Cluster) makeSetServerAddrInitContainer(mgrConfig *mgrConfig, mgrModule string) v1.Container {
// Commands produced for various Ceph major versions (differences highlighted)
// L: config-key set mgr/<mod>/server_addr $(ROOK_CEPH_<MOD>_SERVER_ADDR)
// M: config set mgr.a mgr/<mod>/server_addr $(ROOK_CEPH_<MOD>_SERVER_ADDR)
// N: config set mgr.a mgr/<mod>/server_addr $(ROOK_CEPH_<MOD>_SERVER_ADDR) --force
cfgSetArgs := []string{"config", "set"}
cfgSetArgs = append(cfgSetArgs, fmt.Sprintf("mgr.%s", mgrConfig.DaemonID))
cfgPath := fmt.Sprintf("mgr/%s/%s/server_addr", mgrModule, mgrConfig.DaemonID)
cfgSetArgs = append(cfgSetArgs, cfgPath, opspec.ContainerEnvVarReference(podIPEnvVar))
if c.clusterInfo.CephVersion.IsAtLeastNautilus() {
cfgSetArgs = append(cfgSetArgs, "--force")
}
cfgSetArgs = append(cfgSetArgs, "--verbose")
container := v1.Container{
Name: "init-set-" + strings.ToLower(mgrModule) + "-server-addr",
Command: []string{
"ceph",
},
Args: append(
opspec.AdminFlags(c.clusterInfo),
cfgSetArgs...,
),
Image: c.cephVersion.Image,
VolumeMounts: append(
opspec.DaemonVolumeMounts(mgrConfig.DataPathMap, mgrConfig.ResourceName),
keyring.VolumeMount().Admin(),
),
Env: append(
append(
opspec.DaemonEnvVars(c.cephVersion.Image),
k8sutil.PodIPEnvVar(podIPEnvVar),
),
c.cephMgrOrchestratorModuleEnvs()...,
),
Resources: c.resources,
}
return container
}
func (c *Cluster) makeMgrDaemonContainer(mgrConfig *mgrConfig) v1.Container {
container := v1.Container{
Name: "mgr",
Command: []string{
"ceph-mgr",
},
Args: append(
opspec.DaemonFlags(c.clusterInfo, mgrConfig.DaemonID),
// for ceph-mgr cephfs
// see https://github.com/ceph/ceph-csi/issues/486 for more details
config.NewFlag("client-mount-uid", "0"),
config.NewFlag("client-mount-gid", "0"),
"--foreground",
),
Image: c.cephVersion.Image,
VolumeMounts: opspec.DaemonVolumeMounts(mgrConfig.DataPathMap, mgrConfig.ResourceName),
Ports: []v1.ContainerPort{
{
Name: "mgr",
ContainerPort: int32(6800),
Protocol: v1.ProtocolTCP,
},
{
Name: "http-metrics",
ContainerPort: int32(metricsPort),
Protocol: v1.ProtocolTCP,
},
{
Name: "dashboard",
ContainerPort: int32(c.dashboardPort()),
Protocol: v1.ProtocolTCP,
},
},
Env: append(
opspec.DaemonEnvVars(c.cephVersion.Image),
c.cephMgrOrchestratorModuleEnvs()...,
),
Resources: c.resources,
LivenessProbe: &v1.Probe{
Handler: v1.Handler{
HTTPGet: &v1.HTTPGetAction{
Path: "/",
Port: intstr.FromInt(metricsPort),
},
},
InitialDelaySeconds: 60,
},
SecurityContext: mon.PodSecurityContext(),
}
// If host networking is enabled, we don't need a bind addr that is different from the public addr
if !c.Network.IsHost() {
// Opposite of the above, --public-bind-addr will *not* still advertise on the previous
// port, which makes sense because this is the pod IP, which changes with every new pod.
container.Args = append(container.Args,
config.NewFlag("public-addr", opspec.ContainerEnvVarReference(podIPEnvVar)))
}
return container
}
func (c *Cluster) makeMetricsService(name string) *v1.Service {
labels := opspec.AppLabels(AppName, c.Namespace)
svc := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: c.Namespace,
Labels: labels,
},
Spec: v1.ServiceSpec{
Selector: labels,
Type: v1.ServiceTypeClusterIP,
Ports: []v1.ServicePort{
{
Name: "http-metrics",
Port: int32(metricsPort),
Protocol: v1.ProtocolTCP,
},
},
},
}
k8sutil.SetOwnerRef(&svc.ObjectMeta, &c.ownerRef)
return svc
}
func (c *Cluster) makeDashboardService(name string) *v1.Service {
labels := opspec.AppLabels(AppName, c.Namespace)
portName := "https-dashboard"
if !c.dashboard.SSL {
portName = "dashboard"
}
svc := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-dashboard", name),
Namespace: c.Namespace,
Labels: labels,
},
Spec: v1.ServiceSpec{
Selector: labels,
Type: v1.ServiceTypeClusterIP,
Ports: []v1.ServicePort{
{
Name: portName,
Port: int32(c.dashboardPort()),
Protocol: v1.ProtocolTCP,
},
},
},
}
k8sutil.SetOwnerRef(&svc.ObjectMeta, &c.ownerRef)
return svc
}
func (c *Cluster) getPodLabels(daemonName string) map[string]string {
labels := opspec.PodLabels(AppName, c.Namespace, "mgr", daemonName)
// leave "instance" key for legacy usage
labels["instance"] = daemonName
return labels
}
func (c *Cluster) applyPrometheusAnnotations(objectMeta *metav1.ObjectMeta) error {
if len(c.annotations) == 0 {
t := rookalpha.Annotations{
"prometheus.io/scrape": "true",
"prometheus.io/port": strconv.Itoa(metricsPort),
}
t.ApplyToObjectMeta(objectMeta)
}
return nil
}
func (c *Cluster) cephMgrOrchestratorModuleEnvs() []v1.EnvVar {
operatorNamespace := os.Getenv(k8sutil.PodNamespaceEnvVar)
envVars := []v1.EnvVar{
{Name: "ROOK_OPERATOR_NAMESPACE", Value: operatorNamespace},
{Name: "ROOK_CEPH_CLUSTER_CRD_VERSION", Value: rookcephv1.Version},
{Name: "ROOK_CEPH_CLUSTER_CRD_NAME", Value: c.clusterInfo.Name},
k8sutil.PodIPEnvVar(podIPEnvVar),
}
return envVars
}