Skip to content

Commit

Permalink
fix: segfault on auto-revert with namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
cyrilgdn committed Dec 31, 2022
1 parent 4cc7f75 commit 4fe9511
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 17 deletions.
27 changes: 14 additions & 13 deletions levant/auto_revert.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,48 @@ package levant
import (
"time"

"github.com/hashicorp/nomad/api"
nomad "github.com/hashicorp/nomad/api"
"github.com/rs/zerolog/log"
)

func (l *levantDeployment) autoRevert(jobID, depID *string) {
func (l *levantDeployment) autoRevert(dep *nomad.Deployment) {

// Setup a loop in order to retry a race condition whereby Levant may query
// the latest deployment (auto-revert dep) before it has been started.
i := 0
for i := 0; i < 5; i++ {

dep, _, err := l.nomad.Jobs().LatestDeployment(*jobID, nil)
revertDep, _, err := l.nomad.Jobs().LatestDeployment(dep.JobID, &api.QueryOptions{Namespace: dep.Namespace})
if err != nil {
log.Error().Msgf("levant/auto_revert: unable to query latest deployment of job %s", *jobID)
log.Error().Msgf("levant/auto_revert: unable to query latest deployment of job %s", dep.JobID)
return
}

// Check whether we have got the original deployment ID as a return from
// Nomad, and if so, continue the loop to try again.
if dep.ID == *depID {
log.Debug().Msgf("levant/auto_revert: auto-revert deployment not triggered for job %s, rechecking", *jobID)
if revertDep.ID == dep.ID {
log.Debug().Msgf("levant/auto_revert: auto-revert deployment not triggered for job %s, rechecking", dep.JobID)
time.Sleep(1 * time.Second)
continue
}

log.Info().Msgf("levant/auto_revert: beginning deployment watcher for job %s", *jobID)
success := l.deploymentWatcher(dep.ID)
log.Info().Msgf("levant/auto_revert: beginning deployment watcher for job %s", dep.JobID)
success := l.deploymentWatcher(revertDep.ID)

if success {
log.Info().Msgf("levant/auto_revert: auto-revert of job %s was successful", *jobID)
log.Info().Msgf("levant/auto_revert: auto-revert of job %s was successful", dep.JobID)
break
} else {
log.Error().Msgf("levant/auto_revert: auto-revert of job %s failed; POTENTIAL OUTAGE SITUATION", *jobID)
l.checkFailedDeployment(&dep.ID)
log.Error().Msgf("levant/auto_revert: auto-revert of job %s failed; POTENTIAL OUTAGE SITUATION", dep.JobID)
l.checkFailedDeployment(&revertDep.ID)
break
}
}

// At this point we have not been able to get the latest deploymentID that
// is different from the original so we can't perform auto-revert checking.
if i == 5 {
log.Error().Msgf("levant/auto_revert: unable to check auto-revert of job %s", *jobID)
log.Error().Msgf("levant/auto_revert: unable to check auto-revert of job %s", dep.JobID)
}
}

Expand All @@ -60,6 +60,7 @@ func (l *levantDeployment) checkAutoRevert(dep *nomad.Deployment) {
for _, v := range dep.TaskGroups {
if v.AutoRevert {
revert = true
break
}
}

Expand All @@ -68,7 +69,7 @@ func (l *levantDeployment) checkAutoRevert(dep *nomad.Deployment) {
dep.JobID)

// Run the levant autoRevert function.
l.autoRevert(&dep.JobID, &dep.ID)
l.autoRevert(dep)
} else {
log.Info().Msgf("levant/auto_revert: job %v is not in auto-revert; POTENTIAL OUTAGE SITUATION", dep.JobID)
}
Expand Down
18 changes: 14 additions & 4 deletions test/acctest/acctest.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ type TestCase struct {
// Steps are ran in order stopping on failure
Steps []TestStep

// SetupFunc is called at before the steps
SetupFunc TestStateFunc

// CleanupFunc is called at the end of the TestCase
CleanupFunc TestStateFunc
}
Expand Down Expand Up @@ -42,8 +45,9 @@ type TestStateFunc func(*TestState) error

// TestState is the configuration for the TestCase
type TestState struct {
JobName string
Nomad *nomad.Client
JobName string
Namespace string
Nomad *nomad.Client
}

// Test executes a single TestCase
Expand All @@ -62,6 +66,12 @@ func Test(t *testing.T, c TestCase) {
Nomad: nomad,
}

if c.SetupFunc != nil {
if err := c.SetupFunc(state); err != nil {
t.Errorf("setup failed: %s", err)
}
}

for i, step := range c.Steps {
stepNum := i + 1

Expand Down Expand Up @@ -102,15 +112,15 @@ func Test(t *testing.T, c TestCase) {

// CleanupPurgeJob is a cleanup func to purge the TestCase job from Nomad
func CleanupPurgeJob(s *TestState) error {
_, _, err := s.Nomad.Jobs().Deregister(s.JobName, true, nil)
_, _, err := s.Nomad.Jobs().Deregister(s.JobName, true, &nomad.WriteOptions{Namespace: s.Namespace})
return err
}

// CheckDeploymentStatus is a TestStateFunc to check if the latest deployment of
// the TestCase job matches the desired status
func CheckDeploymentStatus(status string) TestStateFunc {
return func(s *TestState) error {
deploy, _, err := s.Nomad.Jobs().LatestDeployment(s.JobName, nil)
deploy, _, err := s.Nomad.Jobs().LatestDeployment(s.JobName, &nomad.QueryOptions{Namespace: s.Namespace})
if err != nil {
return err
}
Expand Down
4 changes: 4 additions & 0 deletions test/acctest/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ func (c DeployTestStepRunner) Run(s *TestState) error {
return fmt.Errorf("error rendering template: %s", err)
}

if job.Namespace != nil {
s.Namespace = *job.Namespace
}

cfg := &levant.DeployConfig{
Deploy: &structs.DeployConfig{
Canary: c.Canary,
Expand Down
31 changes: 31 additions & 0 deletions test/deploy_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package test

import (
"fmt"
"testing"

"github.com/hashicorp/levant/test/acctest"
"github.com/hashicorp/nomad/api"
)

func TestDeploy_basic(t *testing.T) {
Expand All @@ -20,6 +22,35 @@ func TestDeploy_basic(t *testing.T) {
})
}

func TestDeploy_namespace(t *testing.T) {
acctest.Test(t, acctest.TestCase{
SetupFunc: func(s *acctest.TestState) error {
if _, err := s.Nomad.Namespaces().Register(&api.Namespace{Name: "test"}, nil); err != nil {
return fmt.Errorf("could not create test namespace: %w", err)
}
return nil
},
Steps: []acctest.TestStep{
{
Runner: acctest.DeployTestStepRunner{
FixtureName: "deploy_namespace.nomad",
},
Check: acctest.CheckDeploymentStatus("successful"),
},
},
CleanupFunc: func(s *acctest.TestState) error {
if err := acctest.CleanupPurgeJob(s); err != nil {
return err
}

if _, err := s.Nomad.Namespaces().Delete("test", nil); err != nil {
return fmt.Errorf("could not delete namespace test: %w", err)
}
return nil
},
})
}

func TestDeploy_driverError(t *testing.T) {
acctest.Test(t, acctest.TestCase{
Steps: []acctest.TestStep{
Expand Down
43 changes: 43 additions & 0 deletions test/fixtures/deploy_namespace.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# tests a healthy deployment

job "[[.job_name]]" {
datacenters = ["dc1"]
type = "service"
namespace = "test"
update {
max_parallel = 1
min_healthy_time = "10s"
healthy_deadline = "1m"
auto_revert = true
}

group "test" {
count = 1
restart {
attempts = 10
interval = "5m"
delay = "25s"
mode = "delay"
}
ephemeral_disk {
size = 300
}
task "alpine" {
driver = "docker"
config {
image = "alpine"
command = "tail"
args = [
"-f", "/dev/null"
]
}
resources {
cpu = 100
memory = 128
network {
mbits = 10
}
}
}
}
}

0 comments on commit 4fe9511

Please sign in to comment.