/
deploy.go
575 lines (499 loc) · 17.2 KB
/
deploy.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
package launchpad
import (
"context"
"encoding/base64"
"fmt"
"os"
"path/filepath"
"reflect"
"strings"
gotime "time"
"github.com/imdario/mergo"
"github.com/pkg/errors"
"github.com/sethvargo/go-password/password"
"go.jetpack.io/launchpad/goutil"
"go.jetpack.io/launchpad/goutil/errorutil"
"go.jetpack.io/launchpad/padcli/hook"
"go.jetpack.io/launchpad/padcli/jetconfig"
"go.jetpack.io/launchpad/pkg/buildstamp"
"go.jetpack.io/launchpad/pkg/jetlog"
"go.jetpack.io/launchpad/proto/api"
"golang.org/x/sync/errgroup"
"helm.sh/helm/v3/pkg/cli"
"helm.sh/helm/v3/pkg/release"
"helm.sh/helm/v3/pkg/time"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
)
const (
chartRepoName = "jetpack-chart-repo"
AppChartName = "app"
RuntimeChartName = "jetpack-runtime"
ApiKeySecretName = "api-key-secret"
)
var appChartVersion = buildstamp.StableDockerTag
var runtimeChartVersion = buildstamp.StableDockerTag
// TODO: fix spacing in this struct
type DeployOptions struct {
App *HelmOptions
Environment string // api.Environment
ExternalCharts []*ChartConfig
IsLocalCluster bool
// We should remove this jetconfig dependency. Pass in an interface. This will be easier to design
// once we have a few service types implemented, and we understand the concrete
// requirements. Leaving jetconfig in for now.
JetCfg *jetconfig.Config
KubeContext string
LifecycleHook hook.LifecycleHook
Namespace string
CreateNamespace bool
RemoteEnvVars map[string]string
Runtime *HelmOptions
SecretFilePaths []string
ReinstallOnHelmUpgradeError bool
}
type ChartConfig struct {
Repo string
Name string
Namespace string
Release string // unique identifier for installation (can be same or different from display name)
Timeout gotime.Duration
Wait bool
chartLocation string // optional path to local chart
chartVersion string
instanceName string // resources will inherit this name
values map[string]any
}
func (c *ChartConfig) HumanName() string {
return goutil.Coalesce(c.instanceName, c.Name)
}
type DeployPlan struct {
DeployOptions *DeployOptions
appChartConfig *ChartConfig
runtimeChartConfig *ChartConfig
helmDriver string
}
func (dp *DeployPlan) Charts() []*ChartConfig {
// Order matters
charts := []*ChartConfig{}
if dp.runtimeChartConfig != nil {
charts = append(charts, dp.runtimeChartConfig)
}
if dp.appChartConfig != nil {
charts = append(charts, dp.appChartConfig)
}
return charts
}
type DeployOutput struct {
Duration gotime.Duration
InstanceName string
Namespace string
Releases map[string]*release.Release // keyed by unique chart name
}
func (do *DeployOutput) AppPort() int {
// Get the value set by the user, if any
if port, ok := do.Releases[AppChartName].Config["podPort"]; ok {
return port.(int)
}
// Read the value from the Chart's default values.yaml file.
// This is more correct than the const used below.
//
// For some reason, golang feels these are floats
if floatYourPort, ok := do.Releases[AppChartName].Chart.Values["podPort"].(float64); ok {
// this truncates the float i.e. 5.4 becomes 5,
// which doesn't matter for Port values which are whole numbers
return int(floatYourPort)
}
// All else failing, fallback to the expected default value
return defaultPodPort
}
func (do *DeployOutput) SetDuration(d gotime.Duration) {
if do != nil {
do.Duration = d
}
}
func (p *Pad) deploy(
ctx context.Context,
opts *DeployOptions,
) (*DeployOutput, error) {
plan, err := p.makeDeployPlan(ctx, opts)
if err != nil {
return nil, errors.Wrap(err, "failed to make deploy plan")
}
err = validateDeployPlan(plan)
if err != nil {
return nil, errors.Wrap(err, "failed to validate deploy plan")
}
releases, err := executeDeployPlan(ctx, plan)
if err != nil {
return nil, errors.Wrapf(err, "failed to execute deploy plan")
}
return &DeployOutput{
InstanceName: plan.appChartConfig.instanceName,
Namespace: plan.appChartConfig.Namespace,
Releases: releases,
}, nil
}
func (p *Pad) makeDeployPlan(
ctx context.Context,
opts *DeployOptions,
) (*DeployPlan, error) {
envVars := map[string]string{}
for name, value := range opts.RemoteEnvVars {
envVars[name] = base64.StdEncoding.EncodeToString([]byte(value))
}
// if secrets are already set in helmOptions from env-override flag
// then merge them with secrets from parameter store with priority on env-override values
if _, ok := opts.App.Values["secrets"]; ok {
err := mergo.Merge(&envVars, opts.App.Values["secrets"], mergo.WithOverride)
if err != nil {
return nil, errors.Wrap(err, "unable to merge .env file values with jetpack env values")
}
}
secretsToMountAsFiles, err := loadSecretFiles(opts.SecretFilePaths)
if err != nil {
return nil, errors.Wrapf(err, "failed to load secret data from files: %v", opts.SecretFilePaths)
}
ttlSecondsAfterFinished := 86400 // 24 hours
if strings.EqualFold(opts.Environment, api.Environment_DEV.String()) {
ttlSecondsAfterFinished = 600 // 10 minutes, if dev
}
// Any value that is defaulted in helm/app/values.yaml should probably
// have strutil.NilIfEmpty() applied here. Otherwise, passing an empty string
// will remove the default.
appValues := goutil.FilterStringKeyMap(map[string]any{
"image": opts.App.Values["image"],
"jetpack": map[string]any{
"instanceName": opts.App.InstanceName,
"environment": opts.Environment,
"sdkBinPath": nil,
},
"serviceAccount": map[string]any{
"annotations": map[string]any{
"eks.amazonaws.com/role-arn": nil,
},
},
"secrets": envVars, // store envVars using k8s secrets
"secretsToMountAsFiles": secretsToMountAsFiles,
"jobs": map[string]any{
"ttlSecondsAfterFinished": ttlSecondsAfterFinished,
},
})
if err := mergo.Merge(&appValues, opts.App.Values, mergo.WithAppendSlice); err != nil {
return nil, errors.Wrap(err, "unable to merge value maps")
}
helmDriver := os.Getenv("HELM_DRIVER")
plan := &DeployPlan{
DeployOptions: opts,
helmDriver: helmDriver, // empty is fine.
}
// chart config for user app
plan.appChartConfig = &ChartConfig{
chartLocation: opts.App.ChartLocation,
Name: AppChartName,
chartVersion: appChartVersion,
instanceName: opts.App.InstanceName,
Release: opts.App.ReleaseName,
Namespace: opts.Namespace,
values: appValues,
Wait: true,
Timeout: goutil.Coalesce(opts.App.Timeout, defaultHelmTimeout),
}
if opts.Runtime == nil {
// No need to install runtime chart.
return plan, nil
}
runtimeValues := map[string]any{
"image": map[string]any{}, // pre-create for convenience
"redis": map[string]any{
// "password": this is set later only if needed
// See https://github.com/bitnami/charts/tree/master/bitnami/redis#cluster-topologies
// for possible cluster topologies. For now using standalone for simplicity
// but this can be changed to replication if needed
"architecture": "standalone",
"auth": map[string]any{
// This felt more secure than hardcoding a password here.
// When I tried this without using a file (using env instead)
// it was not working. Not sure if it was some sort of race condition
// with secret creation or something. Once I switched to using password
// file it worked as expected.
"existingSecret": RuntimeChartName,
"existingSecretPasswordKey": "redis-pass",
"usePasswordFiles": true,
},
"master": map[string]any{
"configuration": "notify-keyspace-events K$z",
"persistence": map[string]any{
"enabled": opts.IsLocalCluster,
},
},
},
"jetpack": map[string]any{
// This is set later, only if it is missing. This prevents us from
// overriding a pre-existing api-key's secret value.
// "apiKeySecret": "",
},
}
buildstmp := buildstamp.Get()
if buildstmp.IsDevBinary() {
// Dev defaults to latest because the actual version might not exist.
// In practice, this means in development the runtime might not update automatically.
// use --helm.runtime.set image.tag=[tag] to override
// Question: Should this go in cmd package instead?
runtimeValues["image"].(map[string]any)["tag"] = "latest"
}
if err := mergo.Merge(&runtimeValues, opts.Runtime.Values, mergo.WithAppendSlice, mergo.WithOverride); err != nil {
return nil, errors.Wrap(err, "unable to merge value maps")
}
runtimeChartConfig := &ChartConfig{
chartLocation: opts.Runtime.ChartLocation,
Name: RuntimeChartName,
chartVersion: runtimeChartVersion,
Release: RuntimeChartName, // Not a mistake, name and install name are the same
instanceName: RuntimeChartName, // Not a mistake, name and instance name are the same
Namespace: opts.Namespace,
values: runtimeValues,
Wait: true,
Timeout: goutil.Coalesce(opts.Runtime.Timeout, defaultHelmTimeout),
}
// use the specified kube-context name, if any
settings := newSettings(plan.DeployOptions.KubeContext)
runtimeIsCurrent := func() bool {
isInstalled, err := chartIsInstalledAndCurrent(
ctx,
helmDriver,
runtimeChartConfig,
settings,
// This is a bit ugly and fragile. Any values added after checking if
// runtime is installed need to be filtered out so that
// chartIsInstalledAndCurrent can compare current values to previous
// release. Benefit of adding the values after this check are
// a) performance - no need to query k8s
// b) security - no need for secrets to be fetched
[]valueKeyPath{
[]string{"redis", "password"},
[]string{"jetpack", "apiKeySecret"},
},
)
return err == nil && isInstalled
}
if runtimeIsCurrent() {
jetlog.Logger(ctx).IndentedPrintf(
"\nSkipping upgrade of %s because there are no changes\n",
runtimeChartConfig.Release,
)
} else {
secretData, err := GetRuntimeSecretData(ctx, opts.KubeContext, opts.Namespace)
if err != nil {
return nil, errors.WithStack(err)
}
redisPass, err := getOrCreateRedisPass(secretData)
if err != nil {
return nil, errors.WithStack(err)
}
// WARNING: See comment in runtimeIsCurrent before adding more values here
runtimeChartConfig.values["redis"].(map[string]any)["password"] = redisPass
if apiKey, ok := secretData[ApiKeySecretName]; ok {
runtimeChartConfig.values["jetpack"].(map[string]any)["apiKeySecret"] =
base64.StdEncoding.EncodeToString(apiKey)
}
plan.runtimeChartConfig = runtimeChartConfig
// END OF WARNING
}
return plan, nil
}
func validateDeployPlan(dp *DeployPlan) error {
for _, chart := range dp.Charts() {
if err := chart.validate(); err != nil {
return errors.Wrap(err, "Deploy plan failed to validate")
}
}
// TODO ensure that custom-namespace is not set if we are using the prod-trial cluster
return nil
}
func (cc *ChartConfig) validate() error {
if cc.Name == "" {
return errors.Wrap(errInvalidChartConfig, "Chart Config is missing chart name")
}
if cc.Release == "" {
return errors.Wrap(errInvalidChartConfig, "Chart Config is missing install name")
}
if cc.instanceName == "" {
return errors.Wrap(errInvalidChartConfig, "Chart Config is missing instance name")
}
if cc.Namespace == "" {
return errors.Wrap(errInvalidChartConfig, "Chart Config is missing a namespace")
}
return nil
}
func executeDeployPlan(
ctx context.Context,
dp *DeployPlan,
) (map[string]*release.Release, error) {
errGroup, errGroupCTX := errgroup.WithContext(ctx)
// Watch for errors in the deployed containers to see if we need to cancel the deploy.
// This can happen (for example) if a python-dependency has not been added to requirements.txt
errGroup.Go(func() error {
return errors.WithStack(watchForContainerErrors(errGroupCTX, dp))
})
var errFinishedApplyHelm = errors.New("Finished applying helm charts")
var releases map[string]*release.Release
// Apply the Helm charts for the DeployPlan
errGroup.Go(func() error {
rs, err := applyHelmCharts(errGroupCTX, dp)
if err != nil {
return errors.WithStack(err)
}
releases = rs
// We return this as a whitelisted error so that the errGroup will cancel the other
// goroutine that watches for container errors
return errFinishedApplyHelm
})
err := errGroup.Wait()
if errors.Is(err, errFinishedApplyHelm) {
// This is our whitelisted error used for errGroup goroutines to terminate cleanly
// so we can clear the error at this point.
err = nil
}
return releases, errors.Wrap(err, "failed to apply helm charts")
}
func loadFileDataBase64(path string) (string, error) {
if path == "" {
return "", nil
}
data, err := os.ReadFile(path)
if err != nil {
return "", errorutil.CombinedError(err, errInvalidFile)
}
base64EncodedData := base64.StdEncoding.EncodeToString(data)
return base64EncodedData, nil
}
func loadSecretFiles(paths []string) (map[string]string, error) {
// The secret file map has filename as key and base64 content as value
secretsToMountAsFiles := map[string]string{}
for _, path := range paths {
if path != "" {
filename := filepath.Base(path)
if _, ok := secretsToMountAsFiles[filename]; ok {
return nil, errors.WithStack(errors.Errorf("conflicting secret file names : supplied secret files paths with identical name %s: %s", filename, path))
}
encodedData, err := loadFileDataBase64(path)
if err != nil {
return secretsToMountAsFiles, errors.Wrapf(err, "failed to load secret data from file %s", path)
}
secretsToMountAsFiles[filename] = encodedData
}
}
return secretsToMountAsFiles, nil
}
type valueKeyPath []string
// chartIsInstalledAndCurrent returns true if all these conditions are met:
// 1. An existing and active release of this chart already exists.
// 2. The chart version used by the release is the same as that in cc.
// 3. The user-provided values used by the release are the same as those in cc.
// 4. There were no errors in fetching helm data to validate the prior conditions.
// 5. The runtime was last deployed over 24-hours ago. This is a short-cut we're taking
// to allow us to pin the runtime version to a fixed number and not risk having users
// run stale runtimes for too long. Later, we should actually bump runtime versions
// on both the server and the CLI side, and remove this check.
func chartIsInstalledAndCurrent(
ctx context.Context,
helmDriver string,
cc *ChartConfig,
settings *cli.EnvSettings,
ignoredCurrentValues []valueKeyPath,
) (bool, error) {
currentRelease, err := getRelease(ctx, helmDriver, cc, settings)
if err != nil {
return false, errors.WithStack(err)
}
if currentRelease.Info.LastDeployed.Add(24*gotime.Hour).Before(time.Now()) ||
currentRelease.Info.Status != release.StatusDeployed ||
currentRelease.Chart.Metadata.Version != runtimeChartVersion {
return false, nil
}
currentValues, err := getValues(ctx, helmDriver, cc, settings)
if err != nil {
return false, errors.WithStack(err)
}
for _, path := range ignoredCurrentValues {
goutil.DigDelete(currentValues, path...)
}
return reflect.DeepEqual(cc.values, currentValues), nil
}
func getOrCreateRedisPass(secretData map[string][]byte) (string, error) {
pass, ok := secretData["redis-pass"]
// If redis pass is not here, it means secret does not exist so redis has not been
// previously installed. Generate new password.
// If users are accessing redis in a URL format, symbols can be illegal characters.
if !ok {
p, err := password.Generate(10, 4, 0, false /*noUpper*/, false /*allowRepeat*/)
if err != nil {
return "", errors.WithStack(err)
}
pass = []byte(p)
}
return base64.StdEncoding.EncodeToString(pass), nil
}
func GetRuntimeSecretData(
ctx context.Context,
kubeCtx string,
ns string,
) (map[string][]byte, error) {
secretData, err := getSecretData(ctx, kubeCtx, ns, RuntimeChartName)
if err != nil {
return nil, errors.WithStack(err)
}
return secretData, nil
}
func watchForContainerErrors(ctx context.Context, dp *DeployPlan) error {
rc, err := RESTConfigFromDefaults(dp.DeployOptions.KubeContext)
if err != nil {
return errors.WithStack(err)
}
clientset, err := kubernetes.NewForConfig(rc)
if err != nil {
return errors.WithStack(err)
}
currentReleases, err := listReleases(ctx, dp.helmDriver, dp.DeployOptions.KubeContext, dp.DeployOptions.Namespace)
if err != nil {
return errorutil.CombinedError(err, errUnableToAccessHelmReleases)
}
appRelease := findRelease(currentReleases, dp.DeployOptions.App.ReleaseName)
revision := 1
if appRelease != nil {
revision = appRelease.Version + 1
}
watcher, err := clientset.CoreV1().Pods(dp.appChartConfig.Namespace).Watch(ctx, metav1.ListOptions{
LabelSelector: fmt.Sprintf(
"app.kubernetes.io/name=%s,app.kubernetes.io/instance=%s,jetpack.io/revision=%d",
dp.appChartConfig.Name,
dp.appChartConfig.instanceName,
revision,
),
Watch: true,
})
if err != nil {
return errors.WithStack(err)
}
for event := range watcher.ResultChan() {
if pod, ok := event.Object.(*corev1.Pod); ok {
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.State.Waiting != nil {
reason := containerStatus.State.Waiting.Reason
if reason == "RunContainerError" || reason == "CrashLoopBackOff" {
return errorutil.CombinedError(
ErrPodContainerError,
errorutil.NewUserErrorf(
"[ERROR]: Application failed to start: %s\n",
containerStatus.State.Waiting.Message,
),
)
}
}
}
}
}
return nil
}