diff --git a/pkg/apis/deployment/v1alpha/plan.go b/pkg/apis/deployment/v1alpha/plan.go index 3d04c59a8..9a86de888 100644 --- a/pkg/apis/deployment/v1alpha/plan.go +++ b/pkg/apis/deployment/v1alpha/plan.go @@ -47,6 +47,8 @@ const ( ActionTypeWaitForMemberUp ActionType = "WaitForMemberUp" // ActionTypeRenewTLSCertificate causes the TLS certificate of a member to be renewed. ActionTypeRenewTLSCertificate ActionType = "RenewTLSCertificate" + // ActionTypeRenewTLSCACertificate causes the TLS CA certificate of the entire deployment to be renewed. + ActionTypeRenewTLSCACertificate ActionType = "RenewTLSCACertificate" ) // Action represents a single action to be taken to update a deployment. diff --git a/pkg/deployment/access_package.go b/pkg/deployment/access_package.go index 499a7bdd9..b9b12af77 100644 --- a/pkg/deployment/access_package.go +++ b/pkg/deployment/access_package.go @@ -108,7 +108,7 @@ func (d *Deployment) ensureAccessPackage(apSecretName string) error { // Fetch client authentication CA clientAuthSecretName := spec.Sync.Authentication.GetClientCASecretName() - clientAuthCert, clientAuthKey, err := k8sutil.GetCASecret(d.deps.KubeCli.CoreV1(), clientAuthSecretName, ns) + clientAuthCert, clientAuthKey, _, err := k8sutil.GetCASecret(d.deps.KubeCli.CoreV1(), clientAuthSecretName, ns, nil) if err != nil { log.Debug().Err(err).Msg("Failed to get client-auth CA secret") return maskAny(err) diff --git a/pkg/deployment/context_impl.go b/pkg/deployment/context_impl.go index f9efb7f06..388328eea 100644 --- a/pkg/deployment/context_impl.go +++ b/pkg/deployment/context_impl.go @@ -237,6 +237,25 @@ func (d *Deployment) CleanupPod(p v1.Pod) error { return nil } +// RemovePodFinalizers removes all the finalizers from the Pod with given name in the namespace +// of the deployment. If the pod does not exist, the error is ignored. +func (d *Deployment) RemovePodFinalizers(podName string) error { + log := d.deps.Log + ns := d.GetNamespace() + kubecli := d.deps.KubeCli + p, err := kubecli.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{}) + if err != nil { + if k8sutil.IsNotFound(err) { + return nil + } + return maskAny(err) + } + if err := k8sutil.RemovePodFinalizers(log, d.deps.KubeCli, p, p.GetFinalizers(), true); err != nil { + return maskAny(err) + } + return nil +} + // DeletePvc deletes a persistent volume claim with given name in the namespace // of the deployment. If the pvc does not exist, the error is ignored. func (d *Deployment) DeletePvc(pvcName string) error { @@ -307,3 +326,26 @@ func (d *Deployment) DeleteTLSKeyfile(group api.ServerGroup, member api.MemberSt } return nil } + +// GetTLSCA returns the TLS CA certificate in the secret with given name. +// Returns: publicKey, privateKey, ownerByDeployment, error +func (d *Deployment) GetTLSCA(secretName string) (string, string, bool, error) { + ns := d.apiObject.GetNamespace() + owner := d.apiObject.AsOwner() + cert, priv, isOwned, err := k8sutil.GetCASecret(d.deps.KubeCli.CoreV1(), secretName, ns, &owner) + if err != nil { + return "", "", false, maskAny(err) + } + return cert, priv, isOwned, nil + +} + +// DeleteSecret removes the Secret with given name. +// If the secret does not exist, the error is ignored. +func (d *Deployment) DeleteSecret(secretName string) error { + ns := d.apiObject.GetNamespace() + if err := d.deps.KubeCli.CoreV1().Secrets(ns).Delete(secretName, &metav1.DeleteOptions{}); err != nil && !k8sutil.IsNotFound(err) { + return maskAny(err) + } + return nil +} diff --git a/pkg/deployment/reconcile/action_context.go b/pkg/deployment/reconcile/action_context.go index c127e24b8..6bcb67bfc 100644 --- a/pkg/deployment/reconcile/action_context.go +++ b/pkg/deployment/reconcile/action_context.go @@ -70,9 +70,14 @@ type ActionContext interface { // DeletePvc deletes a persistent volume claim with given name in the namespace // of the deployment. If the pvc does not exist, the error is ignored. DeletePvc(pvcName string) error + // RemovePodFinalizers removes all the finalizers from the Pod with given name in the namespace + // of the deployment. If the pod does not exist, the error is ignored. + RemovePodFinalizers(podName string) error // DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member. // If the secret does not exist, the error is ignored. DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error + // DeleteTLSCASecret removes the Secret containing the TLS CA certificate. + DeleteTLSCASecret() error } // newActionContext creates a new ActionContext implementation. @@ -212,6 +217,15 @@ func (ac *actionContext) DeletePvc(pvcName string) error { return nil } +// RemovePodFinalizers removes all the finalizers from the Pod with given name in the namespace +// of the deployment. If the pod does not exist, the error is ignored. +func (ac *actionContext) RemovePodFinalizers(podName string) error { + if err := ac.context.RemovePodFinalizers(podName); err != nil { + return maskAny(err) + } + return nil +} + // DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member. // If the secret does not exist, the error is ignored. func (ac *actionContext) DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error { @@ -220,3 +234,28 @@ func (ac *actionContext) DeleteTLSKeyfile(group api.ServerGroup, member api.Memb } return nil } + +// DeleteTLSCASecret removes the Secret containing the TLS CA certificate. +func (ac *actionContext) DeleteTLSCASecret() error { + spec := ac.context.GetSpec().TLS + if !spec.IsSecure() { + return nil + } + secretName := spec.GetCASecretName() + if secretName == "" { + return nil + } + // Remove secret hash, since it is going to change + status, lastVersion := ac.context.GetStatus() + if status.SecretHashes != nil { + status.SecretHashes.TLSCA = "" + if err := ac.context.UpdateStatus(status, lastVersion); err != nil { + return maskAny(err) + } + } + // Do delete the secret + if err := ac.context.DeleteSecret(secretName); err != nil { + return maskAny(err) + } + return nil +} diff --git a/pkg/deployment/reconcile/action_renew_tls_ca_certificate.go b/pkg/deployment/reconcile/action_renew_tls_ca_certificate.go new file mode 100644 index 000000000..d22a7abe5 --- /dev/null +++ b/pkg/deployment/reconcile/action_renew_tls_ca_certificate.go @@ -0,0 +1,71 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package reconcile + +import ( + "context" + "time" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/rs/zerolog" +) + +// NewRenewTLSCACertificateAction creates a new Action that implements the given +// planned RenewTLSCACertificate action. +func NewRenewTLSCACertificateAction(log zerolog.Logger, action api.Action, actionCtx ActionContext) Action { + return &renewTLSCACertificateAction{ + log: log, + action: action, + actionCtx: actionCtx, + } +} + +// renewTLSCACertificateAction implements a RenewTLSCACertificate action. +type renewTLSCACertificateAction struct { + log zerolog.Logger + action api.Action + actionCtx ActionContext +} + +// Start performs the start of the action. +// Returns true if the action is completely finished, false in case +// the start time needs to be recorded and a ready condition needs to be checked. +func (a *renewTLSCACertificateAction) Start(ctx context.Context) (bool, error) { + // Just delete the secret. + // It will be re-created. + if err := a.actionCtx.DeleteTLSCASecret(); err != nil { + return false, maskAny(err) + } + return true, nil +} + +// CheckProgress checks the progress of the action. +// Returns true if the action is completely finished, false otherwise. +func (a *renewTLSCACertificateAction) CheckProgress(ctx context.Context) (bool, bool, error) { + return true, false, nil +} + +// Timeout returns the amount of time after which this action will timeout. +func (a *renewTLSCACertificateAction) Timeout() time.Duration { + return renewTLSCACertificateTimeout +} diff --git a/pkg/deployment/reconcile/action_rotate_member.go b/pkg/deployment/reconcile/action_rotate_member.go index 8b4c59f86..41e83e314 100644 --- a/pkg/deployment/reconcile/action_rotate_member.go +++ b/pkg/deployment/reconcile/action_rotate_member.go @@ -57,6 +57,10 @@ func (a *actionRotateMember) Start(ctx context.Context) (bool, error) { if !ok { log.Error().Msg("No such member") } + // Remove finalizers, so Kubernetes will quickly terminate the pod + if err := a.actionCtx.RemovePodFinalizers(m.PodName); err != nil { + return false, maskAny(err) + } if group.IsArangod() { // Invoke shutdown endpoint c, err := a.actionCtx.GetServerClient(ctx, group, a.action.MemberID) diff --git a/pkg/deployment/reconcile/context.go b/pkg/deployment/reconcile/context.go index 66b9e2b23..0e6cb7681 100644 --- a/pkg/deployment/reconcile/context.go +++ b/pkg/deployment/reconcile/context.go @@ -69,6 +69,9 @@ type Context interface { // DeletePvc deletes a persistent volume claim with given name in the namespace // of the deployment. If the pvc does not exist, the error is ignored. DeletePvc(pvcName string) error + // RemovePodFinalizers removes all the finalizers from the Pod with given name in the namespace + // of the deployment. If the pod does not exist, the error is ignored. + RemovePodFinalizers(podName string) error // GetOwnedPods returns a list of all pods owned by the deployment. GetOwnedPods() ([]v1.Pod, error) // GetTLSKeyfile returns the keyfile encoded TLS certificate+key for @@ -77,4 +80,10 @@ type Context interface { // DeleteTLSKeyfile removes the Secret containing the TLS keyfile for the given member. // If the secret does not exist, the error is ignored. DeleteTLSKeyfile(group api.ServerGroup, member api.MemberStatus) error + // GetTLSCA returns the TLS CA certificate in the secret with given name. + // Returns: publicKey, privateKey, ownerByDeployment, error + GetTLSCA(secretName string) (string, string, bool, error) + // DeleteSecret removes the Secret with given name. + // If the secret does not exist, the error is ignored. + DeleteSecret(secretName string) error } diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index 4440b173d..bf6430444 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -23,10 +23,6 @@ package reconcile import ( - "crypto/x509" - "encoding/pem" - "time" - "github.com/rs/zerolog" "github.com/rs/zerolog/log" "k8s.io/api/core/v1" @@ -58,7 +54,7 @@ func (d *Reconciler) CreatePlan() error { apiObject := d.context.GetAPIObject() spec := d.context.GetSpec() status, lastVersion := d.context.GetStatus() - newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods, d.context.GetTLSKeyfile) + newPlan, changed := createPlan(d.log, apiObject, status.Plan, spec, status, pods, d.context.GetTLSKeyfile, d.context.GetTLSCA) // If not change, we're done if !changed { @@ -83,7 +79,8 @@ func (d *Reconciler) CreatePlan() error { func createPlan(log zerolog.Logger, apiObject metav1.Object, currentPlan api.Plan, spec api.DeploymentSpec, status api.DeploymentStatus, pods []v1.Pod, - getTLSKeyfile func(group api.ServerGroup, member api.MemberStatus) (string, error)) (api.Plan, bool) { + getTLSKeyfile func(group api.ServerGroup, member api.MemberStatus) (string, error), + getTLSCA func(string) (string, string, bool, error)) (api.Plan, bool) { if len(currentPlan) > 0 { // Plan already exists, complete that first return currentPlan, false @@ -178,41 +175,14 @@ func createPlan(log zerolog.Logger, apiObject metav1.Object, }) } + // Check for the need to rotate TLS CA certificate and all members + if len(plan) == 0 { + plan = createRotateTLSCAPlan(log, spec, status, getTLSCA) + } + // Check for the need to rotate TLS certificate of a members - if len(plan) == 0 && spec.TLS.IsSecure() { - status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { - for _, m := range members { - if len(plan) > 0 { - // Only 1 change at a time - continue - } - if m.Phase != api.MemberPhaseCreated { - // Only make changes when phase is created - continue - } - if group == api.ServerGroupSyncWorkers { - // SyncWorkers have no externally created TLS keyfile - continue - } - // Load keyfile - keyfile, err := getTLSKeyfile(group, m) - if err != nil { - log.Warn().Err(err). - Str("role", group.AsRole()). - Str("id", m.ID). - Msg("Failed to get TLS secret") - continue - } - renewalNeeded := tlsKeyfileNeedsRenewal(log, keyfile) - if renewalNeeded { - plan = append(append(plan, - api.NewAction(api.ActionTypeRenewTLSCertificate, group, m.ID)), - createRotateMemberPlan(log, m, group, "TLS certificate renewal")..., - ) - } - } - return nil - }) + if len(plan) == 0 { + plan = createRotateTLSServerCertificatePlan(log, spec, status, getTLSKeyfile) } // Return plan @@ -304,44 +274,6 @@ func normalizeServiceAccountName(name string) string { return "" } -// tlsKeyfileNeedsRenewal decides if the certificate in the given keyfile -// should be renewed. -func tlsKeyfileNeedsRenewal(log zerolog.Logger, keyfile string) bool { - raw := []byte(keyfile) - for { - var derBlock *pem.Block - derBlock, raw = pem.Decode(raw) - if derBlock == nil { - break - } - if derBlock.Type == "CERTIFICATE" { - cert, err := x509.ParseCertificate(derBlock.Bytes) - if err != nil { - // We do not understand the certificate, let's renew it - log.Warn().Err(err).Msg("Failed to parse x509 certificate. Renewing it") - return true - } - if cert.IsCA { - // Only look at the server certificate, not CA or intermediate - continue - } - // Check expiration date. Renewal at 2/3 of lifetime. - ttl := cert.NotAfter.Sub(cert.NotBefore) - expirationDate := cert.NotBefore.Add((ttl / 3) * 2) - if expirationDate.Before(time.Now()) { - // We should renew now - log.Debug(). - Str("not-before", cert.NotBefore.String()). - Str("not-after", cert.NotAfter.String()). - Str("expiration-date", expirationDate.String()). - Msg("TLS certificate renewal needed") - return true - } - } - } - return false -} - // createScalePlan creates a scaling plan for a single server group func createScalePlan(log zerolog.Logger, members api.MemberStatusList, group api.ServerGroup, count int) api.Plan { var plan api.Plan diff --git a/pkg/deployment/reconcile/plan_builder_test.go b/pkg/deployment/reconcile/plan_builder_test.go index c43c5fdbe..9948ae722 100644 --- a/pkg/deployment/reconcile/plan_builder_test.go +++ b/pkg/deployment/reconcile/plan_builder_test.go @@ -40,6 +40,9 @@ func TestCreatePlanSingleScale(t *testing.T) { getTLSKeyfile := func(group api.ServerGroup, member api.MemberStatus) (string, error) { return "", maskAny(fmt.Errorf("Not implemented")) } + getTLSCA := func(string) (string, string, bool, error) { + return "", "", false, maskAny(fmt.Errorf("Not implemented")) + } log := zerolog.Nop() spec := api.DeploymentSpec{ Mode: api.NewMode(api.DeploymentModeSingle), @@ -55,7 +58,7 @@ func TestCreatePlanSingleScale(t *testing.T) { // Test with empty status var status api.DeploymentStatus - newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) + newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) assert.True(t, changed) assert.Len(t, newPlan, 0) // Single mode does not scale @@ -66,7 +69,7 @@ func TestCreatePlanSingleScale(t *testing.T) { PodName: "something", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) assert.True(t, changed) assert.Len(t, newPlan, 0) // Single mode does not scale @@ -81,7 +84,7 @@ func TestCreatePlanSingleScale(t *testing.T) { PodName: "something1", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) assert.True(t, changed) assert.Len(t, newPlan, 0) // Single mode does not scale } @@ -91,6 +94,9 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) { getTLSKeyfile := func(group api.ServerGroup, member api.MemberStatus) (string, error) { return "", maskAny(fmt.Errorf("Not implemented")) } + getTLSCA := func(string) (string, string, bool, error) { + return "", "", false, maskAny(fmt.Errorf("Not implemented")) + } log := zerolog.Nop() spec := api.DeploymentSpec{ Mode: api.NewMode(api.DeploymentModeActiveFailover), @@ -107,7 +113,7 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) { // Test with empty status var status api.DeploymentStatus - newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) + newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) assert.True(t, changed) require.Len(t, newPlan, 2) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -120,7 +126,7 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) { PodName: "something", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) assert.True(t, changed) require.Len(t, newPlan, 1) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -145,7 +151,7 @@ func TestCreatePlanActiveFailoverScale(t *testing.T) { PodName: "something4", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) assert.True(t, changed) require.Len(t, newPlan, 2) // Note: Downscaling is only down 1 at a time assert.Equal(t, api.ActionTypeShutdownMember, newPlan[0].Type) @@ -159,6 +165,9 @@ func TestCreatePlanClusterScale(t *testing.T) { getTLSKeyfile := func(group api.ServerGroup, member api.MemberStatus) (string, error) { return "", maskAny(fmt.Errorf("Not implemented")) } + getTLSCA := func(string) (string, string, bool, error) { + return "", "", false, maskAny(fmt.Errorf("Not implemented")) + } log := zerolog.Nop() spec := api.DeploymentSpec{ Mode: api.NewMode(api.DeploymentModeCluster), @@ -174,7 +183,7 @@ func TestCreatePlanClusterScale(t *testing.T) { // Test with empty status var status api.DeploymentStatus - newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) + newPlan, changed := createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) assert.True(t, changed) require.Len(t, newPlan, 6) // Adding 3 dbservers & 3 coordinators (note: agents do not scale now) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -207,7 +216,7 @@ func TestCreatePlanClusterScale(t *testing.T) { PodName: "coordinator1", }, } - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) assert.True(t, changed) require.Len(t, newPlan, 3) assert.Equal(t, api.ActionTypeAddMember, newPlan[0].Type) @@ -244,7 +253,7 @@ func TestCreatePlanClusterScale(t *testing.T) { } spec.DBServers.Count = util.NewInt(1) spec.Coordinators.Count = util.NewInt(1) - newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile) + newPlan, changed = createPlan(log, depl, nil, spec, status, nil, getTLSKeyfile, getTLSCA) assert.True(t, changed) require.Len(t, newPlan, 5) // Note: Downscaling is done 1 at a time assert.Equal(t, api.ActionTypeCleanOutMember, newPlan[0].Type) diff --git a/pkg/deployment/reconcile/plan_builder_tls.go b/pkg/deployment/reconcile/plan_builder_tls.go new file mode 100644 index 000000000..1c0e5f39a --- /dev/null +++ b/pkg/deployment/reconcile/plan_builder_tls.go @@ -0,0 +1,237 @@ +// +// DISCLAIMER +// +// Copyright 2018 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Ewout Prangsma +// + +package reconcile + +import ( + "crypto/x509" + "encoding/pem" + "net" + "time" + + api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/rs/zerolog" +) + +// createRotateTLSServerCertificatePlan creates plan to rotate a server because of an (soon to be) expired TLS certificate. +func createRotateTLSServerCertificatePlan(log zerolog.Logger, spec api.DeploymentSpec, status api.DeploymentStatus, + getTLSKeyfile func(group api.ServerGroup, member api.MemberStatus) (string, error)) api.Plan { + if !spec.TLS.IsSecure() { + return nil + } + var plan api.Plan + status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { + for _, m := range members { + if len(plan) > 0 { + // Only 1 change at a time + continue + } + if m.Phase != api.MemberPhaseCreated { + // Only make changes when phase is created + continue + } + if group == api.ServerGroupSyncWorkers { + // SyncWorkers have no externally created TLS keyfile + continue + } + // Load keyfile + keyfile, err := getTLSKeyfile(group, m) + if err != nil { + log.Warn().Err(err). + Str("role", group.AsRole()). + Str("id", m.ID). + Msg("Failed to get TLS secret") + continue + } + renewalNeeded := tlsKeyfileNeedsRenewal(log, keyfile) + if renewalNeeded { + plan = append(append(plan, + api.NewAction(api.ActionTypeRenewTLSCertificate, group, m.ID)), + createRotateMemberPlan(log, m, group, "TLS certificate renewal")..., + ) + } + } + return nil + }) + return plan +} + +// createRotateTLSCAPlan creates plan to replace a TLS CA and rotate all server. +func createRotateTLSCAPlan(log zerolog.Logger, spec api.DeploymentSpec, status api.DeploymentStatus, + getTLSCA func(string) (string, string, bool, error)) api.Plan { + if !spec.TLS.IsSecure() { + return nil + } + secretName := spec.TLS.GetCASecretName() + cert, _, isOwned, err := getTLSCA(secretName) + if err != nil { + log.Warn().Err(err).Str("secret-name", secretName).Msg("Failed to fetch TLS CA secret") + return nil + } + if !isOwned { + // TLS CA is not owned by the deployment, we cannot change it + return nil + } + var plan api.Plan + if renewalNeeded, reason := tlsCANeedsRenewal(log, cert, spec.TLS); renewalNeeded { + var planSuffix api.Plan + plan = append(plan, + api.NewAction(api.ActionTypeRenewTLSCACertificate, 0, "", reason), + ) + status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { + for _, m := range members { + if m.Phase != api.MemberPhaseCreated { + // Only make changes when phase is created + continue + } + if !group.IsArangod() { + // Sync master/worker is not applicable here + continue + } + plan = append(plan, + api.NewAction(api.ActionTypeRenewTLSCertificate, group, m.ID), + api.NewAction(api.ActionTypeRotateMember, group, m.ID, "TLS CA certificate changed"), + ) + planSuffix = append(planSuffix, + api.NewAction(api.ActionTypeWaitForMemberUp, group, m.ID, "TLS CA certificate changed"), + ) + } + return nil + }) + plan = append(plan, planSuffix...) + } + return plan +} + +// tlsKeyfileNeedsRenewal decides if the certificate in the given keyfile +// should be renewed. +func tlsKeyfileNeedsRenewal(log zerolog.Logger, keyfile string) bool { + raw := []byte(keyfile) + for { + var derBlock *pem.Block + derBlock, raw = pem.Decode(raw) + if derBlock == nil { + break + } + if derBlock.Type == "CERTIFICATE" { + cert, err := x509.ParseCertificate(derBlock.Bytes) + if err != nil { + // We do not understand the certificate, let's renew it + log.Warn().Err(err).Msg("Failed to parse x509 certificate. Renewing it") + return true + } + if cert.IsCA { + // Only look at the server certificate, not CA or intermediate + continue + } + // Check expiration date. Renewal at 2/3 of lifetime. + ttl := cert.NotAfter.Sub(cert.NotBefore) + expirationDate := cert.NotBefore.Add((ttl / 3) * 2) + if expirationDate.Before(time.Now()) { + // We should renew now + log.Debug(). + Str("not-before", cert.NotBefore.String()). + Str("not-after", cert.NotAfter.String()). + Str("expiration-date", expirationDate.String()). + Msg("TLS certificate renewal needed") + return true + } + } + } + return false +} + +// tlsCANeedsRenewal decides if the given CA certificate +// should be renewed. +// Returns: shouldRenew, reason +func tlsCANeedsRenewal(log zerolog.Logger, cert string, spec api.TLSSpec) (bool, string) { + raw := []byte(cert) + // containsAll returns true when all elements in the expected list + // are in the actual list. + containsAll := func(actual []string, expected []string) bool { + for _, x := range expected { + found := false + for _, y := range actual { + if x == y { + found = true + break + } + } + if !found { + return false + } + } + return true + } + ipsToStringSlice := func(list []net.IP) []string { + result := make([]string, len(list)) + for i, x := range list { + result[i] = x.String() + } + return result + } + for { + var derBlock *pem.Block + derBlock, raw = pem.Decode(raw) + if derBlock == nil { + break + } + if derBlock.Type == "CERTIFICATE" { + cert, err := x509.ParseCertificate(derBlock.Bytes) + if err != nil { + // We do not understand the certificate, let's renew it + log.Warn().Err(err).Msg("Failed to parse x509 certificate. Renewing it") + return true, "Cannot parse x509 certificate: " + err.Error() + } + if !cert.IsCA { + // Only look at the CA certificate + continue + } + // Check expiration date. Renewal at 90% of lifetime. + ttl := cert.NotAfter.Sub(cert.NotBefore) + expirationDate := cert.NotBefore.Add((ttl / 10) * 9) + if expirationDate.Before(time.Now()) { + // We should renew now + log.Debug(). + Str("not-before", cert.NotBefore.String()). + Str("not-after", cert.NotAfter.String()). + Str("expiration-date", expirationDate.String()). + Msg("TLS CA certificate renewal needed") + return true, "CA Certificate about to expire" + } + // Check alternate names against spec + dnsNames, ipAddresses, emailAddress, err := spec.GetParsedAltNames() + if err == nil { + if !containsAll(cert.DNSNames, dnsNames) { + return true, "Some alternate DNS names are missing" + } + if !containsAll(ipsToStringSlice(cert.IPAddresses), ipAddresses) { + return true, "Some alternate IP addresses are missing" + } + if !containsAll(cert.EmailAddresses, emailAddress) { + return true, "Some alternate email addresses are missing" + } + } + } + } + return false, "" +} diff --git a/pkg/deployment/reconcile/plan_executor.go b/pkg/deployment/reconcile/plan_executor.go index 81ff7570d..763d308aa 100644 --- a/pkg/deployment/reconcile/plan_executor.go +++ b/pkg/deployment/reconcile/plan_executor.go @@ -171,6 +171,8 @@ func (d *Reconciler) createAction(ctx context.Context, log zerolog.Logger, actio return NewWaitForMemberUpAction(log, action, actionCtx) case api.ActionTypeRenewTLSCertificate: return NewRenewTLSCertificateAction(log, action, actionCtx) + case api.ActionTypeRenewTLSCACertificate: + return NewRenewTLSCACertificateAction(log, action, actionCtx) default: panic(fmt.Sprintf("Unknown action type '%s'", action.Type)) } diff --git a/pkg/deployment/reconcile/timeouts.go b/pkg/deployment/reconcile/timeouts.go index f7c31a3ce..a2273d18d 100644 --- a/pkg/deployment/reconcile/timeouts.go +++ b/pkg/deployment/reconcile/timeouts.go @@ -25,12 +25,13 @@ package reconcile import "time" const ( - addMemberTimeout = time.Minute * 5 - cleanoutMemberTimeout = time.Hour * 12 - removeMemberTimeout = time.Minute * 15 - renewTLSCertificateTimeout = time.Minute * 30 - rotateMemberTimeout = time.Minute * 30 - shutdownMemberTimeout = time.Minute * 30 - upgradeMemberTimeout = time.Hour * 6 - waitForMemberUpTimeout = time.Minute * 15 + addMemberTimeout = time.Minute * 5 + cleanoutMemberTimeout = time.Hour * 12 + removeMemberTimeout = time.Minute * 15 + renewTLSCertificateTimeout = time.Minute * 30 + renewTLSCACertificateTimeout = time.Minute * 30 + rotateMemberTimeout = time.Minute * 30 + shutdownMemberTimeout = time.Minute * 30 + upgradeMemberTimeout = time.Hour * 6 + waitForMemberUpTimeout = time.Minute * 15 ) diff --git a/pkg/deployment/resources/certificates_client_auth.go b/pkg/deployment/resources/certificates_client_auth.go index b5e494632..d3f3be143 100644 --- a/pkg/deployment/resources/certificates_client_auth.go +++ b/pkg/deployment/resources/certificates_client_auth.go @@ -74,7 +74,7 @@ func createClientAuthCACertificate(log zerolog.Logger, cli v1.CoreV1Interface, s func createClientAuthCertificateKeyfile(log zerolog.Logger, cli v1.CoreV1Interface, commonName string, ttl time.Duration, spec api.SyncAuthenticationSpec, secretName, namespace string, ownerRef *metav1.OwnerReference) error { log = log.With().Str("secret", secretName).Logger() // Load CA certificate - caCert, caKey, err := k8sutil.GetCASecret(cli, spec.GetClientCASecretName(), namespace) + caCert, caKey, _, err := k8sutil.GetCASecret(cli, spec.GetClientCASecretName(), namespace, nil) if err != nil { log.Debug().Err(err).Msg("Failed to load CA certificate") return maskAny(err) diff --git a/pkg/deployment/resources/certificates_tls.go b/pkg/deployment/resources/certificates_tls.go index f0e5e8d88..678edbcf6 100644 --- a/pkg/deployment/resources/certificates_tls.go +++ b/pkg/deployment/resources/certificates_tls.go @@ -89,7 +89,7 @@ func createTLSServerCertificate(log zerolog.Logger, cli v1.CoreV1Interface, serv } // Load CA certificate - caCert, caKey, err := k8sutil.GetCASecret(cli, spec.GetCASecretName(), namespace) + caCert, caKey, _, err := k8sutil.GetCASecret(cli, spec.GetCASecretName(), namespace, nil) if err != nil { log.Debug().Err(err).Msg("Failed to load CA certificate") return maskAny(err) diff --git a/pkg/util/k8sutil/secrets.go b/pkg/util/k8sutil/secrets.go index d5283cac3..a2db083b7 100644 --- a/pkg/util/k8sutil/secrets.go +++ b/pkg/util/k8sutil/secrets.go @@ -108,22 +108,31 @@ func GetCACertficateSecret(cli corev1.CoreV1Interface, secretName, namespace str // and extracts the `ca.crt` & `ca.key` field. // If the secret does not exists or one of the fields is missing, // an error is returned. -// Returns: certificate, private-key, error -func GetCASecret(cli corev1.CoreV1Interface, secretName, namespace string) (string, string, error) { +// Returns: certificate, private-key, isOwnedByDeployment, error +func GetCASecret(cli corev1.CoreV1Interface, secretName, namespace string, ownerRef *metav1.OwnerReference) (string, string, bool, error) { s, err := cli.Secrets(namespace).Get(secretName, metav1.GetOptions{}) if err != nil { - return "", "", maskAny(err) + return "", "", false, maskAny(err) + } + isOwned := false + if ownerRef != nil { + for _, x := range s.GetOwnerReferences() { + if x.UID == ownerRef.UID { + isOwned = true + break + } + } } // Load `ca.crt` field cert, found := s.Data[constants.SecretCACertificate] if !found { - return "", "", maskAny(fmt.Errorf("No '%s' found in secret '%s'", constants.SecretCACertificate, secretName)) + return "", "", isOwned, maskAny(fmt.Errorf("No '%s' found in secret '%s'", constants.SecretCACertificate, secretName)) } priv, found := s.Data[constants.SecretCAKey] if !found { - return "", "", maskAny(fmt.Errorf("No '%s' found in secret '%s'", constants.SecretCAKey, secretName)) + return "", "", isOwned, maskAny(fmt.Errorf("No '%s' found in secret '%s'", constants.SecretCAKey, secretName)) } - return string(cert), string(priv), nil + return string(cert), string(priv), isOwned, nil } // CreateCASecret creates a secret used to store a PEM encoded CA certificate & private key.