Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pkg/deployment/deployment_inspector.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ func (d *Deployment) inspectDeployment(lastInterval util.Interval) util.Interval
d.CreateEvent(k8sutil.NewErrorEvent("Member failure detection failed", err, d.apiObject))
}

// Immediate actions
if err := d.reconciler.CheckDeployment(); err != nil {
hasError = true
d.CreateEvent(k8sutil.NewErrorEvent("Reconciler immediate actions failed", err, d.apiObject))
}

// Create scale/update plan
if err := d.reconciler.CreatePlan(); err != nil {
hasError = true
Expand Down
36 changes: 35 additions & 1 deletion pkg/deployment/reconcile/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@

package reconcile

import "github.com/rs/zerolog"
import (
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
"github.com/rs/zerolog"
)

// Reconciler is the service that takes care of bring the a deployment
// in line with its (changed) specification.
Expand All @@ -38,3 +41,34 @@ func NewReconciler(log zerolog.Logger, context Context) *Reconciler {
context: context,
}
}

// CheckDeployment checks for obviously broken things and fixes them immediately
func (r *Reconciler) CheckDeployment() error {
spec := r.context.GetSpec()
status, _ := r.context.GetStatus()

if spec.GetMode().HasCoordinators() {
// Check if there are coordinators
if len(status.Members.Coordinators) == 0 {
// No more coordinators! Take immediate action
r.log.Error().Msg("No Coordinator members! Create one member immediately")
_, err := r.context.CreateMember(api.ServerGroupCoordinators, "")
if err != nil {
return err
}
} else if status.Members.Coordinators.AllFailed() {
r.log.Error().Msg("All coordinators failed - reset")
for _, m := range status.Members.Coordinators {
if err := r.context.DeletePod(m.PodName); err != nil {
r.log.Error().Err(err).Msg("Failed to delete pod")
}
m.Phase = api.MemberPhaseNone
if err := status.Members.Update(m, api.ServerGroupCoordinators); err != nil {
r.log.Error().Err(err).Msg("Failed to update member")
}
}
}
}

return nil
}
13 changes: 0 additions & 13 deletions pkg/deployment/resources/pod_inspector.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,19 +243,6 @@ func (r *Resources) InspectPods(ctx context.Context) (util.Interval, error) {
allMembersReady := status.Members.AllMembersReady(spec.GetMode(), spec.Sync.IsEnabled())
status.Conditions.Update(api.ConditionTypeReady, allMembersReady, "", "")

if spec.GetMode().HasCoordinators() && status.Members.Coordinators.AllFailed() {
log.Error().Msg("All coordinators failed - reset")
for _, m := range status.Members.Coordinators {
if err := r.context.DeletePod(m.PodName); err != nil {
log.Error().Err(err).Msg("Failed to delete pod")
}
m.Phase = api.MemberPhaseNone
if err := status.Members.Update(m, api.ServerGroupCoordinators); err != nil {
log.Error().Err(err).Msg("Failed to update member")
}
}
}

// Update conditions
if len(podNamesWithScheduleTimeout) > 0 {
if status.Conditions.Update(api.ConditionTypePodSchedulingFailure, true,
Expand Down