Skip to content

Commit

Permalink
Add basic status checking for batch job types.
Browse files Browse the repository at this point in the history
Nomad job type batch does not support the deployment model which
meant previously Levant would just exit after the job was
registered. This update adds additional functionality which means
Levant will at least confirm the job has a status of running
before exiting.

Closes #53
  • Loading branch information
jrasell committed Dec 27, 2017
1 parent 054878c commit 2ba1bc0
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 3 deletions.
5 changes: 2 additions & 3 deletions levant/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,8 @@ func (c *nomadClient) Deploy(job *nomad.Job, autoPromote int, forceCount bool) (
}

// GH-50: batch job types do not return an evaluation upon registration.
if eval == nil && *job.Type == nomadStructs.JobTypeBatch {
logging.Debug("levant/deploy: job type %s does not create evaluations", nomadStructs.JobTypeBatch)
return true
if eval.EvalID == "" && *job.Type == nomadStructs.JobTypeBatch {
return c.checkBatchJob(job.Name)
}

// Trigger the evaluationInspector to identify any potential errors in the
Expand Down
53 changes: 53 additions & 0 deletions levant/job_status_checker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package levant

import (
"time"

nomad "github.com/hashicorp/nomad/api"
nomadStructs "github.com/hashicorp/nomad/nomad/structs"
"github.com/jrasell/levant/logging"
)

// checkBatchJob checks the status of a batch job at least reaches a status of
// running. This is required as currently Nomad does not support deployments of
// job type batch.
func (c *nomadClient) checkBatchJob(jobName *string) bool {

// Initialiaze our WaitIndex
var wi uint64

// Setup the Nomad QueryOptions to allow blocking query and a timeout.
q := &nomad.QueryOptions{WaitIndex: wi}
timeout := time.Tick(time.Minute * 5)

for {

job, meta, err := c.nomad.Jobs().Info(*jobName, q)
if err != nil {
logging.Error("levant/job_status_checker: unable to query batch job %s: %v", *jobName, err)
return false
}

// If the LastIndex is not greater than our stored LastChangeIndex, we don't
// need to do anything.
if meta.LastIndex <= wi {
continue
}

if *job.Status == nomadStructs.JobStatusRunning {
logging.Info("levant/job_status_checker: batch job %s has status %s", *jobName, *job.Status)
return true
}

select {
case <-timeout:
logging.Error("levant/job_status_checker: timeout reached while verifying the status of batch job %s",
*jobName)
return false
default:
logging.Debug("levant/job_status_checker: batch job %s currently has status %s", *jobName, *job.Status)
q.WaitIndex = meta.LastIndex
continue
}
}
}

0 comments on commit 2ba1bc0

Please sign in to comment.