From 032065277f6c27a871fec139e8bac826035714a6 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Tue, 19 Jun 2018 08:30:20 +0200 Subject: [PATCH] Adding downtimeAllowed field --- .../Kubernetes/DeploymentResource.md | 17 ++++++ .../deployment/v1alpha/deployment_spec.go | 9 +++ .../v1alpha/zz_generated.deepcopy.go | 9 +++ pkg/deployment/reconcile/plan_builder.go | 4 +- .../reconcile/plan_builder_storage.go | 2 +- pkg/deployment/reconcile/plan_builder_tls.go | 61 +++++++++++-------- pkg/util/k8sutil/events.go | 12 +++- 7 files changed, 84 insertions(+), 30 deletions(-) diff --git a/docs/Manual/Deployment/Kubernetes/DeploymentResource.md b/docs/Manual/Deployment/Kubernetes/DeploymentResource.md index 7d56fe50a..d0fbf6e83 100644 --- a/docs/Manual/Deployment/Kubernetes/DeploymentResource.md +++ b/docs/Manual/Deployment/Kubernetes/DeploymentResource.md @@ -107,6 +107,23 @@ Possible values are: This setting cannot be changed after the cluster has been created. +### `spec.downtimeAllowed: bool` + +This setting is used to allow automatic reconciliation actions that yield +some downtime of the ArangoDB deployment. +When this setting is set to `false` (the default), no automatic action that +may result in downtime is allowed. +If the need for such an action is detected, an event is added to the `ArangoDeployment`. + +Once this setting is set to `true`, the automatic action is executed. + +Operations that may result in downtime are: + +- Rotating TLS CA certificate + +Note: It is still possible that there is some downtime when the Kubernetes +cluster is down, or in a bad state, irrespective of the value of this setting. + ### `spec.rocksdb.encryption.keySecretName` This setting specifies the name of a kubernetes `Secret` that contains diff --git a/pkg/apis/deployment/v1alpha/deployment_spec.go b/pkg/apis/deployment/v1alpha/deployment_spec.go index 6c4ccaa8e..aff2041a0 100644 --- a/pkg/apis/deployment/v1alpha/deployment_spec.go +++ b/pkg/apis/deployment/v1alpha/deployment_spec.go @@ -50,6 +50,7 @@ type DeploymentSpec struct { StorageEngine *StorageEngine `json:"storageEngine,omitempty"` Image *string `json:"image,omitempty"` ImagePullPolicy *v1.PullPolicy `json:"imagePullPolicy,omitempty"` + DowntimeAllowed *bool `json:"downtimeAllowed,omitempty"` ExternalAccess ExternalAccessSpec `json:"externalAccess"` RocksDB RocksDBSpec `json:"rocksdb"` @@ -92,6 +93,11 @@ func (s DeploymentSpec) GetImagePullPolicy() v1.PullPolicy { return util.PullPolicyOrDefault(s.ImagePullPolicy) } +// IsDowntimeAllowed returns the value of downtimeAllowed. +func (s DeploymentSpec) IsDowntimeAllowed() bool { + return util.BoolOrDefault(s.DowntimeAllowed) +} + // IsAuthenticated returns true when authentication is enabled func (s DeploymentSpec) IsAuthenticated() bool { return s.Authentication.IsAuthenticated() @@ -171,6 +177,9 @@ func (s *DeploymentSpec) SetDefaultsFrom(source DeploymentSpec) { if s.ImagePullPolicy == nil { s.ImagePullPolicy = util.NewPullPolicyOrNil(source.ImagePullPolicy) } + if s.DowntimeAllowed == nil { + s.DowntimeAllowed = util.NewBoolOrNil(source.DowntimeAllowed) + } s.ExternalAccess.SetDefaultsFrom(source.ExternalAccess) s.RocksDB.SetDefaultsFrom(source.RocksDB) s.Authentication.SetDefaultsFrom(source.Authentication) diff --git a/pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go b/pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go index 0b46c7a8a..c8d083f60 100644 --- a/pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go +++ b/pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go @@ -253,6 +253,15 @@ func (in *DeploymentSpec) DeepCopyInto(out *DeploymentSpec) { **out = **in } } + if in.DowntimeAllowed != nil { + in, out := &in.DowntimeAllowed, &out.DowntimeAllowed + if *in == nil { + *out = nil + } else { + *out = new(bool) + **out = **in + } + } in.ExternalAccess.DeepCopyInto(&out.ExternalAccess) in.RocksDB.DeepCopyInto(&out.RocksDB) in.Authentication.DeepCopyInto(&out.Authentication) diff --git a/pkg/deployment/reconcile/plan_builder.go b/pkg/deployment/reconcile/plan_builder.go index 484f05013..9057a6d71 100644 --- a/pkg/deployment/reconcile/plan_builder.go +++ b/pkg/deployment/reconcile/plan_builder.go @@ -82,7 +82,7 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject, getTLSKeyfile func(group api.ServerGroup, member api.MemberStatus) (string, error), getTLSCA func(string) (string, string, bool, error), getPVC func(pvcName string) (*v1.PersistentVolumeClaim, error), - createEvent func(evt *v1.Event)) (api.Plan, bool) { + createEvent func(evt *k8sutil.Event)) (api.Plan, bool) { if len(currentPlan) > 0 { // Plan already exists, complete that first return currentPlan, false @@ -189,7 +189,7 @@ func createPlan(log zerolog.Logger, apiObject k8sutil.APIObject, // Check for the need to rotate TLS CA certificate and all members if len(plan) == 0 { - plan = createRotateTLSCAPlan(log, spec, status, getTLSCA) + plan = createRotateTLSCAPlan(log, apiObject, spec, status, getTLSCA, createEvent) } // Return plan diff --git a/pkg/deployment/reconcile/plan_builder_storage.go b/pkg/deployment/reconcile/plan_builder_storage.go index d0f93440c..87090785c 100644 --- a/pkg/deployment/reconcile/plan_builder_storage.go +++ b/pkg/deployment/reconcile/plan_builder_storage.go @@ -35,7 +35,7 @@ import ( // different storage class or a difference in storage resource requirements. func createRotateServerStoragePlan(log zerolog.Logger, apiObject k8sutil.APIObject, spec api.DeploymentSpec, status api.DeploymentStatus, getPVC func(pvcName string) (*v1.PersistentVolumeClaim, error), - createEvent func(evt *v1.Event)) api.Plan { + createEvent func(evt *k8sutil.Event)) api.Plan { if spec.GetMode() == api.DeploymentModeSingle { // Storage cannot be changed in single server deployments return nil diff --git a/pkg/deployment/reconcile/plan_builder_tls.go b/pkg/deployment/reconcile/plan_builder_tls.go index 1c0e5f39a..8c46801a9 100644 --- a/pkg/deployment/reconcile/plan_builder_tls.go +++ b/pkg/deployment/reconcile/plan_builder_tls.go @@ -29,6 +29,7 @@ import ( "time" api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" "github.com/rs/zerolog" ) @@ -76,8 +77,10 @@ func createRotateTLSServerCertificatePlan(log zerolog.Logger, spec api.Deploymen } // createRotateTLSCAPlan creates plan to replace a TLS CA and rotate all server. -func createRotateTLSCAPlan(log zerolog.Logger, spec api.DeploymentSpec, status api.DeploymentStatus, - getTLSCA func(string) (string, string, bool, error)) api.Plan { +func createRotateTLSCAPlan(log zerolog.Logger, apiObject k8sutil.APIObject, + spec api.DeploymentSpec, status api.DeploymentStatus, + getTLSCA func(string) (string, string, bool, error), + createEvent func(evt *k8sutil.Event)) api.Plan { if !spec.TLS.IsSecure() { return nil } @@ -93,31 +96,37 @@ func createRotateTLSCAPlan(log zerolog.Logger, spec api.DeploymentSpec, status a } var plan api.Plan if renewalNeeded, reason := tlsCANeedsRenewal(log, cert, spec.TLS); renewalNeeded { - var planSuffix api.Plan - plan = append(plan, - api.NewAction(api.ActionTypeRenewTLSCACertificate, 0, "", reason), - ) - status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { - for _, m := range members { - if m.Phase != api.MemberPhaseCreated { - // Only make changes when phase is created - continue + if spec.IsDowntimeAllowed() { + var planSuffix api.Plan + plan = append(plan, + api.NewAction(api.ActionTypeRenewTLSCACertificate, 0, "", reason), + ) + status.Members.ForeachServerGroup(func(group api.ServerGroup, members api.MemberStatusList) error { + for _, m := range members { + if m.Phase != api.MemberPhaseCreated { + // Only make changes when phase is created + continue + } + if !group.IsArangod() { + // Sync master/worker is not applicable here + continue + } + plan = append(plan, + api.NewAction(api.ActionTypeRenewTLSCertificate, group, m.ID), + api.NewAction(api.ActionTypeRotateMember, group, m.ID, "TLS CA certificate changed"), + ) + planSuffix = append(planSuffix, + api.NewAction(api.ActionTypeWaitForMemberUp, group, m.ID, "TLS CA certificate changed"), + ) } - if !group.IsArangod() { - // Sync master/worker is not applicable here - continue - } - plan = append(plan, - api.NewAction(api.ActionTypeRenewTLSCertificate, group, m.ID), - api.NewAction(api.ActionTypeRotateMember, group, m.ID, "TLS CA certificate changed"), - ) - planSuffix = append(planSuffix, - api.NewAction(api.ActionTypeWaitForMemberUp, group, m.ID, "TLS CA certificate changed"), - ) - } - return nil - }) - plan = append(plan, planSuffix...) + return nil + }) + plan = append(plan, planSuffix...) + } else { + // Rotating the CA results in downtime. + // That is currently not allowed. + createEvent(k8sutil.NewDowntimeNotAllowedEvent(apiObject, "Rotate TLS CA")) + } } return plan } diff --git a/pkg/util/k8sutil/events.go b/pkg/util/k8sutil/events.go index 65efe643d..201d4c3ba 100644 --- a/pkg/util/k8sutil/events.go +++ b/pkg/util/k8sutil/events.go @@ -173,7 +173,7 @@ func NewPlanAbortedEvent(apiObject APIObject, itemType, memberID, role string) * // NewCannotChangeStorageClassEvent creates an event indicating that an item would need to use a different StorageClass, // but this is not possible for the given reason. -func NewCannotChangeStorageClassEvent(apiObject APIObject, memberID, role, subReason string) *v1.Event { +func NewCannotChangeStorageClassEvent(apiObject APIObject, memberID, role, subReason string) *Event { event := newDeploymentEvent(apiObject) event.Type = v1.EventTypeNormal event.Reason = fmt.Sprintf("%s Member StorageClass Cannot Change", strings.Title(role)) @@ -181,6 +181,16 @@ func NewCannotChangeStorageClassEvent(apiObject APIObject, memberID, role, subRe return event } +// NewDowntimeNotAllowedEvent creates an event indicating that an operation cannot be executed because downtime +// is currently not allowed. +func NewDowntimeNotAllowedEvent(apiObject APIObject, operation string) *Event { + event := newDeploymentEvent(apiObject) + event.Type = v1.EventTypeNormal + event.Reason = "Downtime Operation Postponed" + event.Message = fmt.Sprintf("The '%s' operation is postponed because downtime it not allowed. Set `spec.downtimeAllowed` to true to execute this operation", operation) + return event +} + // NewErrorEvent creates an even of type error. func NewErrorEvent(reason string, err error, apiObject APIObject) *Event { event := newDeploymentEvent(apiObject)