diff --git a/pkg/apis/deployment/v1alpha/plan.go b/pkg/apis/deployment/v1alpha/plan.go index 27793d45d..3d04c59a8 100644 --- a/pkg/apis/deployment/v1alpha/plan.go +++ b/pkg/apis/deployment/v1alpha/plan.go @@ -45,6 +45,8 @@ const ( ActionTypeUpgradeMember ActionType = "UpgradeMember" // ActionTypeWaitForMemberUp causes the plan to wait until the member is considered "up". ActionTypeWaitForMemberUp ActionType = "WaitForMemberUp" + // ActionTypeRenewTLSCertificate causes the TLS certificate of a member to be renewed. + ActionTypeRenewTLSCertificate ActionType = "RenewTLSCertificate" ) // Action represents a single action to be taken to update a deployment. diff --git a/pkg/deployment/context_impl.go b/pkg/deployment/context_impl.go index a417570db..ce6c9d64a 100644 --- a/pkg/deployment/context_impl.go +++ b/pkg/deployment/context_impl.go @@ -194,3 +194,26 @@ func (d *Deployment) GetOwnedPods() ([]v1.Pod, error) { } return myPods, nil } + +// GetTLSKeyfile returns the keyfile encoded TLS certificate+key for +// the given member. +func (d *Deployment) GetTLSKeyfile(group api.ServerGroup, member api.MemberStatus) (string, error) { + secretName := k8sutil.CreateTLSKeyfileSecretName(d.apiObject.GetName(), group.AsRole(), member.ID) + ns := d.apiObject.GetNamespace() + result, err := k8sutil.GetTLSKeyfileSecret(d.deps.KubeCli.CoreV1(), secretName, ns) + if err != nil { + return "", maskAny(err) + } + return result, nil +} + +// DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member. +// If the secret does not exist, the error is ignored. +func (d *Deployment) DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error { + secretName := k8sutil.CreateTLSKeyfileSecretName(d.apiObject.GetName(), group.AsRole(), member.ID) + ns := d.apiObject.GetNamespace() + if err := d.deps.KubeCli.CoreV1().Secrets(ns).Delete(secretName, &metav1.DeleteOptions{}); err != nil && !k8sutil.IsNotFound(err) { + return maskAny(err) + } + return nil +} diff --git a/pkg/deployment/reconcile/action_context.go b/pkg/deployment/reconcile/action_context.go index f1ae06883..03bfde1e0 100644 --- a/pkg/deployment/reconcile/action_context.go +++ b/pkg/deployment/reconcile/action_context.go @@ -64,6 +64,9 @@ type ActionContext interface { // DeletePvc deletes a persistent volume claim with given name in the namespace // of the deployment. If the pvc does not exist, the error is ignored. DeletePvc(pvcName string) error + // DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member. + // If the secret does not exist, the error is ignored. + DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error } // newActionContext creates a new ActionContext implementation. @@ -181,3 +184,12 @@ func (ac *actionContext) DeletePvc(pvcName string) error { } return nil } + +// DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member. +// If the secret does not exist, the error is ignored. +func (ac *actionContext) DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error { + if err := ac.context.DeleteTLSKeyfile(group, member); err != nil { + return maskAny(err) + } + return nil +} diff --git a/pkg/deployment/reconcile/action_renew_tls_certificate.go b/pkg/deployment/reconcile/action_renew_tls_certificate.go new file mode 100644 index 000000000..284394a0f --- /dev/null +++ b/pkg/deployment/reconcile/action_renew_tls_certificate.go @@ -0,0 +1,71 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package reconcile + +import ( + "context" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/rs/zerolog" +) + +// NewRenewTLSCertificateAction creates a new Action that implements the given +// planned RenewTLSCertificate action. +func NewRenewTLSCertificateAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { + return &renewTLSCertificateAction{ + log: log, + action: action, + actionCtx: actionCtx, + } +} + +// renewTLSCertificateAction implements a RenewTLSCertificate action. +type renewTLSCertificateAction struct { + log zerolog.Logger + action api.Action + actionCtx ActionContext +} + +// Start performs the start of the action. +// Returns true if the action is completely finished, false in case +// the start time needs to be recorded and a ready condition needs to be checked. +func (a *renewTLSCertificateAction) Start(ctx context.Context) (bool, error) { + log := a.log + group := a.action.Group + m, ok := a.actionCtx.GetMemberStatusByID(a.action.MemberID) + if !ok { + log.Error().Msg("No such member") + } + // Just delete the secret. + // It will be re-created when the member restarts. + if err := a.actionCtx.DeleteTLSKeyfile(group, m); err != nil { + return false, maskAny(err) + } + return false, nil +} + +// CheckProgress checks the progress of the action. +// Returns true if the action is completely finished, false otherwise. +func (a *renewTLSCertificateAction) CheckProgress(ctx context.Context) (bool, error) { + return true, nil +} diff --git a/pkg/deployment/reconcile/context.go b/pkg/deployment/reconcile/context.go index 3d95784c1..fa2ca0795 100644 --- a/pkg/deployment/reconcile/context.go +++ b/pkg/deployment/reconcile/context.go @@ -63,4 +63,10 @@ type Context interface { DeletePvc(pvcName string) error // GetOwnedPods returns a list of all pods owned by the deployment. GetOwnedPods() ([]v1.Pod, error) + // GetTLSKeyfile returns the keyfile encoded TLS certificate+key for + // the given member. + GetTLSKeyfile(group api.ServerGroup, member api.MemberStatus) (string, error) + // DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member. + // If the secret does not exist, the error is ignored. + DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error } diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index 098b275bd..29cfd22e7 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -23,11 +23,16 @@ package reconcile import ( - api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "crypto/x509" + "encoding/pem" + "time" + "github.com/rs/zerolog" "github.com/rs/zerolog/log" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" ) // upgradeDecision is the result of an upgrade check. @@ -52,7 +57,7 @@ func (d *Reconciler) CreatePlan() error { apiObject := d.context.GetAPIObject() spec := d.context.GetSpec() status := d.context.GetStatus() - newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods) + newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods, d.context.GetTLSKeyfile) // If not change, we're done if !changed { @@ -76,7 +81,8 @@ func (d *Reconciler) CreatePlan() error { // Otherwise the new plan is returned with a boolean true. func createPlan(log zerolog.Logger, apiObject metav1.Object, currentPlan api.Plan, spec api.DeploymentSpec, - status api.DeploymentStatus, pods []v1.Pod) (api.Plan, bool) { + status api.DeploymentStatus, pods []v1.Pod, + getTLSKeyfile func(group api.ServerGroup, member api.MemberStatus) (string, error)) (api.Plan, bool) { if len(currentPlan) > 0 { // Plan already exists, complete that first return currentPlan, false @@ -158,6 +164,39 @@ func createPlan(log zerolog.Logger, apiObject metav1.Object, }) } + // Check for the need to rotate TLS certificate of a members + if len(plan) == 0 && spec.TLS.IsSecure() { + status.Members.ForeachServerGroup(func(group api.ServerGroup, members *api.MemberStatusList) error { + for _, m := range *members { + if len(plan) > 0 { + // Only 1 change at a time + continue + } + if m.Phase != api.MemberPhaseCreated { + // Only make changes when phase is created + continue + } + // Load keyfile + keyfile, err := getTLSKeyfile(group, m) + if err != nil { + log.Warn().Err(err). + Str("role", group.AsRole()). + Str("id", m.ID). + Msg("Failed to get TLS secret") + continue + } + renewalNeeded := tlsKeyfileNeedsRenewal(log, keyfile) + if renewalNeeded { + plan = append(append(plan, + api.NewAction(api.ActionTypeRenewTLSCertificate, group, m.ID)), + createRotateMemberPlan(log, m, group, "TLS certificate renewal")..., + ) + } + } + return nil + }) + } + // Return plan return plan, true } @@ -233,6 +272,44 @@ func podNeedsRotation(p v1.Pod, apiObject metav1.Object, spec api.DeploymentSpec return false, "" } +// tlsKeyfileNeedsRenewal decides if the certificate in the given keyfile +// should be renewed. +func tlsKeyfileNeedsRenewal(log zerolog.Logger, keyfile string) bool { + raw := []byte(keyfile) + for { + var derBlock *pem.Block + derBlock, raw = pem.Decode(raw) + if derBlock == nil { + break + } + if derBlock.Type == "CERTIFICATE" { + cert, err := x509.ParseCertificate(derBlock.Bytes) + if err != nil { + // We do not understand the certificate, let's renew it + log.Warn().Err(err).Msg("Failed to parse x509 certificate. Renewing it") + return true + } + if cert.IsCA { + // Only look at the server certificate, not CA or intermediate + continue + } + // Check expiration date. Renewal at 2/3 of lifetime. + ttl := cert.NotAfter.Sub(cert.NotBefore) + expirationDate := cert.NotBefore.Add((ttl / 3) * 2) + if expirationDate.Before(time.Now()) { + // We should renew now + log.Debug(). + Str("not-before", cert.NotBefore.String()). + Str("not-after", cert.NotAfter.String()). + Str("expiration-date", expirationDate.String()). + Msg("TLS certificate renewal needed") + return true + } + } + } + return false +} + // createScalePlan creates a scaling plan for a single server group func createScalePlan(log zerolog.Logger, members api.MemberStatusList, group api.ServerGroup, count int) api.Plan { var plan api.Plan diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index 00016ef11..20a7640bf 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -23,6 +23,7 @@ package reconcile import ( + "fmt" "testing" "github.com/rs/zerolog" @@ -36,6 +37,9 @@ import ( // TestCreatePlanSingleScale creates a `single` deployment to test the creating of scaling plan. func TestCreatePlanSingleScale(t *testing.T) { + getTLSKeyfile := func(group api.ServerGroup, member api.MemberStatus) (string, error) { + return "", maskAny(fmt.Errorf("Not implemented")) + } log := zerolog.Nop() spec := api.DeploymentSpec{ Mode: api.NewMode(api.DeploymentModeSingle), @@ -51,7 +55,7 @@ func TestCreatePlanSingleScale(t *testing.T) { // Test with empty status var status api.DeploymentStatus - newPlan, changed := createPlan(log, depl, nil, spec, status, nil) + newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) assert.True(t, changed) assert.Len(t, newPlan, 0) // Single mode does not scale @@ -62,7 +66,7 @@ func TestCreatePlanSingleScale(t *testing.T) { PodName: "something", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) assert.True(t, changed) assert.Len(t, newPlan, 0) // Single mode does not scale @@ -77,13 +81,16 @@ func TestCreatePlanSingleScale(t *testing.T) { PodName: "something1", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) assert.True(t, changed) assert.Len(t, newPlan, 0) // Single mode does not scale } // TestCreatePlanResilientSingleScale creates a `resilientsingle` deployment to test the creating of scaling plan. func TestCreatePlanResilientSingleScale(t *testing.T) { + getTLSKeyfile := func(group api.ServerGroup, member api.MemberStatus) (string, error) { + return "", maskAny(fmt.Errorf("Not implemented")) + } log := zerolog.Nop() spec := api.DeploymentSpec{ Mode: api.NewMode(api.DeploymentModeResilientSingle), @@ -100,7 +107,7 @@ func TestCreatePlanResilientSingleScale(t *testing.T) { // Test with empty status var status api.DeploymentStatus - newPlan, changed := createPlan(log, depl, nil, spec, status, nil) + newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) assert.True(t, changed) require.Len(t, newPlan, 2) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -113,7 +120,7 @@ func TestCreatePlanResilientSingleScale(t *testing.T) { PodName: "something", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) assert.True(t, changed) require.Len(t, newPlan, 1) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -138,7 +145,7 @@ func TestCreatePlanResilientSingleScale(t *testing.T) { PodName: "something4", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) assert.True(t, changed) require.Len(t, newPlan, 2) // Note: Downscaling is only down 1 at a time assert.Equal(t, api.ActionTypeShutdownMember, newPlan[0].Type) @@ -149,6 +156,9 @@ func TestCreatePlanResilientSingleScale(t *testing.T) { // TestCreatePlanClusterScale creates a `cluster` deployment to test the creating of scaling plan. func TestCreatePlanClusterScale(t *testing.T) { + getTLSKeyfile := func(group api.ServerGroup, member api.MemberStatus) (string, error) { + return "", maskAny(fmt.Errorf("Not implemented")) + } log := zerolog.Nop() spec := api.DeploymentSpec{ Mode: api.NewMode(api.DeploymentModeCluster), @@ -164,7 +174,7 @@ func TestCreatePlanClusterScale(t *testing.T) { // Test with empty status var status api.DeploymentStatus - newPlan, changed := createPlan(log, depl, nil, spec, status, nil) + newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) assert.True(t, changed) require.Len(t, newPlan, 6) // Adding 3 dbservers & 3 coordinators (note: agents do not scale now) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -197,7 +207,7 @@ func TestCreatePlanClusterScale(t *testing.T) { PodName: "coordinator1", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) assert.True(t, changed) require.Len(t, newPlan, 3) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -234,7 +244,7 @@ func TestCreatePlanClusterScale(t *testing.T) { } spec.DBServers.Count = util.NewInt(1) spec.Coordinators.Count = util.NewInt(1) - newPlan, changed = createPlan(log, depl, nil, spec, status, nil) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) assert.True(t, changed) require.Len(t, newPlan, 5) // Note: Downscaling is done 1 at a time assert.Equal(t, api.ActionTypeCleanOutMember, newPlan[0].Type) diff --git a/pkg/deployment/reconcile/plan_executor.go b/pkg/deployment/reconcile/plan_executor.go index 635c9b89d..ca0291e38 100644 --- a/pkg/deployment/reconcile/plan_executor.go +++ b/pkg/deployment/reconcile/plan_executor.go @@ -134,6 +134,8 @@ func (d *Reconciler) createAction(ctx context.Context, log zerolog.Logger, actio return NewUpgradeMemberAction(log, action, actionCtx) case api.ActionTypeWaitForMemberUp: return NewWaitForMemberUpAction(log, action, actionCtx) + case api.ActionTypeRenewTLSCertificate: + return NewRenewTLSCertificateAction(log, action, actionCtx) default: panic(fmt.Sprintf("Unknown action type '%s'", action.Type)) } diff --git a/pkg/util/k8sutil/secrets.go b/pkg/util/k8sutil/secrets.go index 12d8683d2..7dca11481 100644 --- a/pkg/util/k8sutil/secrets.go +++ b/pkg/util/k8sutil/secrets.go @@ -114,6 +114,22 @@ func CreateCASecret(cli corev1.CoreV1Interface, secretName, namespace string, ce return nil } +// GetTLSKeyfileSecret loads a secret used to store a PEM encoded keyfile +// in the format ArangoDB accepts it for its `--ssl.keyfile` option. +// Returns: keyfile (pem encoded), error +func GetTLSKeyfileSecret(cli corev1.CoreV1Interface, secretName, namespace string) (string, error) { + s, err := cli.Secrets(namespace).Get(secretName, metav1.GetOptions{}) + if err != nil { + return "", maskAny(err) + } + // Load `tls.keyfile` field + keyfile, found := s.Data[constants.SecretTLSKeyfile] + if !found { + return "", maskAny(fmt.Errorf("No '%s' found in secret '%s'", constants.SecretTLSKeyfile, secretName)) + } + return string(keyfile), nil +} + // CreateTLSKeyfileSecret creates a secret used to store a PEM encoded keyfile // in the format ArangoDB accepts it for its `--ssl.keyfile` option. func CreateTLSKeyfileSecret(cli corev1.CoreV1Interface, secretName, namespace string, keyfile string, ownerRef *metav1.OwnerReference) error {