From 25788d3ba6a4930897641f191dd79aefcbe1bd3c Mon Sep 17 00:00:00 2001 From: James Rasell Date: Mon, 20 Nov 2017 11:41:14 +0000 Subject: [PATCH] Update Levant deployment to inspect the evaluation results. Nomad can return an evaluation which has incurred errors due to issues such as resource starvation. Previously Levant only checked whether an actual API error was returned from the evaluation call which meant errors during this phase were not caught. This change implaments an evaluation inspector which iterates through any errors during an evaluation to provide this feedback to the user. Closes #39 --- levant/deploy.go | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/levant/deploy.go b/levant/deploy.go index f88aadda0..c5902c582 100644 --- a/levant/deploy.go +++ b/levant/deploy.go @@ -60,6 +60,15 @@ func (c *nomadClient) Deploy(job *nomad.Job, autoPromote int) (success bool) { return } + // Trigger the evaluationInspector to identify any potential errors in the + // Nomad evaluation run. As far as I can tell from testing; a single alloc + // failure in an evaluation means no allocs will be placed so we exit here. + err = c.evaluationInspector(&eval.EvalID) + if err != nil { + logging.Error("levant/deploy: %v", err) + return + } + switch *job.Type { case nomadStructs.JobTypeService: logging.Debug("levant/deploy: beginning deployment watcher for job %s", *job.Name) @@ -72,6 +81,47 @@ func (c *nomadClient) Deploy(job *nomad.Job, autoPromote int) (success bool) { return } +func (c *nomadClient) evaluationInspector(evalID *string) error { + + evalInfo, _, err := c.nomad.Evaluations().Info(*evalID, nil) + if err != nil { + return err + } + + for { + switch evalInfo.Status { + case nomadStructs.EvalStatusComplete, nomadStructs.EvalStatusFailed, nomadStructs.EvalStatusCancelled: + if len(evalInfo.FailedTGAllocs) == 0 { + logging.Info("levant/deploy: evaluation %s finished successfully", *evalID) + return nil + } + + var class, dimension []string + + for group, metrics := range evalInfo.FailedTGAllocs { + + // Iterate the classes and dimensions to generate lists of each failure. + for c := range metrics.ClassExhausted { + class = append(class, c) + } + for d := range metrics.DimensionExhausted { + dimension = append(dimension, d) + } + + logging.Error("levant/deploy: task group %s failed to place %v allocs, failed on %v and exhausted %v", + group, metrics.CoalescedFailures+1, class, dimension) + } + + return fmt.Errorf("evaluation %v finished with status %s but failed to place allocations", + *evalID, evalInfo.Status) + + default: + time.Sleep(1 * time.Second) + continue + } + } +} + func (c *nomadClient) deploymentWatcher(evalID string, autoPromote int) (success bool) { var canaryChan chan interface{}