Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
- (Bugfix) Add DistributeShardsLike support
- (Feature) Member restarts metric
- (Bugfix) Infinite loop fix in ArangoD AsyncClient
- (Bugfix) Add Panic Handler

## [1.2.13](https://github.com/arangodb/kube-arangodb/tree/1.2.13) (2022-06-07)
- (Bugfix) Fix arangosync members state inspection
Expand Down
1 change: 1 addition & 0 deletions docs/generated/metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
| [arangodb_operator_agency_cache_member_serving](./arangodb_operator_agency_cache_member_serving.md) | arangodb_operator | agency_cache | Gauge | Determines if agency member is reachable |
| [arangodb_operator_agency_cache_present](./arangodb_operator_agency_cache_present.md) | arangodb_operator | agency_cache | Gauge | Determines if local agency cache is present |
| [arangodb_operator_agency_cache_serving](./arangodb_operator_agency_cache_serving.md) | arangodb_operator | agency_cache | Gauge | Determines if agency is serving |
| [arangodb_operator_engine_panics_recovered](./arangodb_operator_engine_panics_recovered.md) | arangodb_operator | engine | Counter | Number of Panics recovered inside Operator reconciliation loop |
| [arangodb_operator_members_unexpected_container_exit_codes](./arangodb_operator_members_unexpected_container_exit_codes.md) | arangodb_operator | members | Counter | Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContainers) |
| [arangodb_operator_rebalancer_enabled](./arangodb_operator_rebalancer_enabled.md) | arangodb_operator | rebalancer | Gauge | Determines if rebalancer is enabled |
| [arangodb_operator_rebalancer_moves_current](./arangodb_operator_rebalancer_moves_current.md) | arangodb_operator | rebalancer | Gauge | Define how many moves are currently in progress |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# arangodb_operator_engine_panics_recovered (Counter)

## Description

Number of Panics recovered inside Operator reconciliation loop. Section represents recovery section

## Labels

| Label | Description |
|:-------:|:--------------|
| section | Panic Section |
11 changes: 10 additions & 1 deletion internal/metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -166,4 +166,13 @@ namespaces:
- key: container_type
description: "Container/InitContainer/EphemeralContainer"
- key: code
description: "ExitCode"
description: "ExitCode"
engine:
panics_recovered:
shortDescription: "Number of Panics recovered inside Operator reconciliation loop"
description: "Number of Panics recovered inside Operator reconciliation loop. Section represents recovery section"
type: "Counter"
labels:
- key: section
description: "Panic Section"

23 changes: 21 additions & 2 deletions pkg/deployment/reconcile/plan_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
"github.com/arangodb/kube-arangodb/pkg/metrics"
"github.com/arangodb/kube-arangodb/pkg/util/errors"
"github.com/arangodb/kube-arangodb/pkg/util/errors/panics"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
)

Expand Down Expand Up @@ -305,7 +306,7 @@ func (d *Reconciler) executeAction(ctx context.Context, planAction api.Action, a

if !planAction.IsStarted() {
// Not started yet
ready, err := action.Start(ctx)
ready, err := d.executeActionStart(ctx, action)
if err != nil {
if g := getStartFailureGracePeriod(action); g > 0 && !planAction.CreationTime.IsZero() {
if time.Since(planAction.CreationTime.Time) < g {
Expand Down Expand Up @@ -333,7 +334,7 @@ func (d *Reconciler) executeAction(ctx context.Context, planAction api.Action, a
}

// First action of plan has been started, check its progress
ready, abort, err := action.CheckProgress(ctx)
ready, abort, err := d.executeActionCheckProgress(ctx, action)
if err != nil {
log.Err(err).Debug("Failed to check action progress")
return false, false, false, false, errors.WithStack(err)
Expand Down Expand Up @@ -362,6 +363,24 @@ func (d *Reconciler) executeAction(ctx context.Context, planAction api.Action, a
return false, false, true, false, nil
}

func (d *Reconciler) executeActionCheckProgress(ctx context.Context, action Action) (ready bool, abort bool, retErr error) {
retErr = panics.RecoverWithSection("ActionProgress", func() (err error) {
ready, abort, err = action.CheckProgress(ctx)
return
})

return
}

func (d *Reconciler) executeActionStart(ctx context.Context, action Action) (done bool, retErr error) {
retErr = panics.RecoverWithSection("ActionStart", func() (err error) {
done, err = action.Start(ctx)
return
})

return
}

// createAction create action object based on action type
func (d *Reconciler) createAction(action api.Action) (Action, ActionContext) {
actionCtx := newActionContext(d.log, d.context, &d.metrics)
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

175 changes: 0 additions & 175 deletions pkg/logging/logger_test.go

This file was deleted.

Loading