Skip to content

Commit

Permalink
Update Levant deployment to inspect the evaluation results.
Browse files Browse the repository at this point in the history
Nomad can return an evaluation which has incurred errors due to
issues such as resource starvation. Previously Levant only
checked whether an actual API error was returned from the
evaluation call which meant errors during this phase were not
caught.

This change implaments an evaluation inspector which iterates
through any errors during an evaluation to provide this feedback
to the user.

Closes #39
  • Loading branch information
jrasell committed Nov 20, 2017
1 parent f013954 commit 25788d3
Showing 1 changed file with 50 additions and 0 deletions.
50 changes: 50 additions & 0 deletions levant/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ func (c *nomadClient) Deploy(job *nomad.Job, autoPromote int) (success bool) {
return
}

// Trigger the evaluationInspector to identify any potential errors in the
// Nomad evaluation run. As far as I can tell from testing; a single alloc
// failure in an evaluation means no allocs will be placed so we exit here.
err = c.evaluationInspector(&eval.EvalID)
if err != nil {
logging.Error("levant/deploy: %v", err)
return
}

switch *job.Type {
case nomadStructs.JobTypeService:
logging.Debug("levant/deploy: beginning deployment watcher for job %s", *job.Name)
Expand All @@ -72,6 +81,47 @@ func (c *nomadClient) Deploy(job *nomad.Job, autoPromote int) (success bool) {
return
}

func (c *nomadClient) evaluationInspector(evalID *string) error {

evalInfo, _, err := c.nomad.Evaluations().Info(*evalID, nil)
if err != nil {
return err
}

for {
switch evalInfo.Status {
case nomadStructs.EvalStatusComplete, nomadStructs.EvalStatusFailed, nomadStructs.EvalStatusCancelled:
if len(evalInfo.FailedTGAllocs) == 0 {
logging.Info("levant/deploy: evaluation %s finished successfully", *evalID)
return nil
}

var class, dimension []string

for group, metrics := range evalInfo.FailedTGAllocs {

// Iterate the classes and dimensions to generate lists of each failure.
for c := range metrics.ClassExhausted {
class = append(class, c)
}
for d := range metrics.DimensionExhausted {
dimension = append(dimension, d)
}

logging.Error("levant/deploy: task group %s failed to place %v allocs, failed on %v and exhausted %v",
group, metrics.CoalescedFailures+1, class, dimension)
}

return fmt.Errorf("evaluation %v finished with status %s but failed to place allocations",
*evalID, evalInfo.Status)

default:
time.Sleep(1 * time.Second)
continue
}
}
}

func (c *nomadClient) deploymentWatcher(evalID string, autoPromote int) (success bool) {

var canaryChan chan interface{}
Expand Down

0 comments on commit 25788d3

Please sign in to comment.