Skip to content

Commit

Permalink
Improve error logging from multiple packing errors (#552)
Browse files Browse the repository at this point in the history
  • Loading branch information
ellistarn committed Jul 28, 2021
1 parent 12329bc commit cdf88f2
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 33 deletions.
16 changes: 9 additions & 7 deletions pkg/cloudprovider/aws/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ import (
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/awslabs/karpenter/pkg/cloudprovider"
"go.uber.org/multierr"

"go.uber.org/multierr"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
)

const (
Expand Down Expand Up @@ -100,10 +101,7 @@ func (p *InstanceProvider) Create(ctx context.Context,
if err != nil {
return nil, fmt.Errorf("creating fleet %w", err)
}
if count := len(createFleetOutput.Instances); count != 1 {
return nil, combineFleetErrors(createFleetOutput.Errors)
}
if count := len(createFleetOutput.Instances[0].InstanceIds); count != 1 {
if len(createFleetOutput.Instances) != 1 || len(createFleetOutput.Instances[0].InstanceIds) != 1 {
return nil, combineFleetErrors(createFleetOutput.Errors)
}
return createFleetOutput.Instances[0].InstanceIds[0], nil
Expand Down Expand Up @@ -134,8 +132,12 @@ func getInstanceID(node *v1.Node) (*string, error) {
}

func combineFleetErrors(errors []*ec2.CreateFleetError) (errs error) {
unique := sets.NewString()
for _, err := range errors {
errs = multierr.Append(errs, fmt.Errorf("%s", *err.ErrorCode))
unique.Insert(aws.StringValue(err.ErrorCode))
}
for _, errorCode := range unique.List() {
errs = multierr.Append(errs, fmt.Errorf(errorCode))
}
return errs
return fmt.Errorf("with fleet error(s), %w", errs)
}
4 changes: 2 additions & 2 deletions pkg/cloudprovider/aws/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ var _ = Describe("Allocation", func() {
It("should not schedule a pod with an invalid subnet", func() {
provisioner.Spec.InstanceTypes = []string{"m5.large"} // limit instance type to simplify ConsistOf checks
ExpectCreated(env.Client, provisioner)
pods := ExpectProvisioningFailed(ctx, env.Client, controller, provisioner,
pods := ExpectProvisioningSucceeded(ctx, env.Client, controller, provisioner,
test.PendingPod(test.PodOptions{NodeSelector: map[string]string{SubnetTagKeyLabel: "Invalid"}}),
)
// Assertions
Expand Down Expand Up @@ -562,7 +562,7 @@ var _ = Describe("Allocation", func() {
})
It("should not schedule a pod with an invalid security group", func() {
ExpectCreated(env.Client, provisioner)
pods := ExpectProvisioningFailed(ctx, env.Client, controller, provisioner,
pods := ExpectProvisioningSucceeded(ctx, env.Client, controller, provisioner,
test.PendingPod(test.PodOptions{NodeSelector: map[string]string{SecurityGroupTagKeyLabel: "Invalid"}}),
)
// Assertions
Expand Down
13 changes: 6 additions & 7 deletions pkg/controllers/allocation/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ import (
"github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha3"
"github.com/awslabs/karpenter/pkg/cloudprovider"
"github.com/awslabs/karpenter/pkg/packing"
"github.com/awslabs/karpenter/pkg/utils/result"
"go.uber.org/multierr"
"golang.org/x/time/rate"
"knative.dev/pkg/logging"

"go.uber.org/multierr"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -93,23 +94,21 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco
// 3. Filter pods
pods, err := c.Filter.GetProvisionablePods(ctx, provisioner)
if err != nil {
return reconcile.Result{}, fmt.Errorf("filtering pods, %w", err)
return result.RetryIfError(ctx, fmt.Errorf("filtering pods, %w", err))
}
if len(pods) == 0 {
return reconcile.Result{}, nil
}
logging.FromContext(ctx).Infof("Found %d provisionable pods", len(pods))

// 4. Group by constraints
constraintGroups, err := c.Constraints.Group(ctx, provisioner, pods)
if err != nil {
return reconcile.Result{}, fmt.Errorf("building constraint groups, %w", err)
return result.RetryIfError(ctx, fmt.Errorf("building constraint groups, %w", err))
}

// 5. Get Instance Types Options
instanceTypes, err := c.CloudProvider.GetInstanceTypes(ctx)
if err != nil {
return reconcile.Result{}, fmt.Errorf("getting instance types, %w", err)
return result.RetryIfError(ctx, fmt.Errorf("getting instance types, %w", err))
}

// 6. Binpack each group
Expand All @@ -128,7 +127,7 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco
return c.Binder.Bind(ctx, node, packing.Pods)
})
})
return reconcile.Result{}, multierr.Combine(errs...)
return result.RetryIfError(ctx, multierr.Combine(errs...))
}

func (c *Controller) Register(ctx context.Context, m manager.Manager) error {
Expand Down
1 change: 1 addition & 0 deletions pkg/controllers/allocation/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func (f *Filter) GetProvisionablePods(ctx context.Context, provisioner *v1alpha3
}
provisionable = append(provisionable, ptr.Pod(p))
}
logging.FromContext(ctx).Infof("Found %d provisionable pods", len(provisionable))
return provisionable, nil
}

Expand Down
17 changes: 0 additions & 17 deletions pkg/test/expectations/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,23 +122,6 @@ func ExpectProvisioningSucceeded(ctx context.Context, c client.Client, reconcile
return result
}

func ExpectProvisioningFailed(ctx context.Context, c client.Client, reconciler reconcile.Reconciler, provisioner *v1alpha3.Provisioner, pods ...*v1.Pod) []*v1.Pod {
for _, pod := range pods {
ExpectCreatedWithStatus(c, pod)
}
ExpectReconcileFailed(ctx, reconciler, client.ObjectKeyFromObject(provisioner))
result := []*v1.Pod{}
for _, pod := range pods {
result = append(result, ExpectPodExists(c, pod.GetName(), pod.GetNamespace()))
}
return result
}

func ExpectReconcileFailed(ctx context.Context, reconciler reconcile.Reconciler, key client.ObjectKey) {
_, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).To(HaveOccurred())
}

func ExpectReconcileSucceeded(ctx context.Context, reconciler reconcile.Reconciler, key client.ObjectKey) {
_, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expand Down
17 changes: 17 additions & 0 deletions pkg/utils/result/result.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package result

import (
"context"

"go.uber.org/multierr"
"knative.dev/pkg/logging"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

// RetryIfError logs any errors and requeues if not nil. Supports multierr unwrapping.
func RetryIfError(ctx context.Context, err error) (reconcile.Result, error) {
for _, err := range multierr.Errors(err) {
logging.FromContext(ctx).Errorf("Failed allocation, %s", err.Error())
}
return reconcile.Result{Requeue: err != nil}, nil
}

0 comments on commit cdf88f2

Please sign in to comment.