Skip to content

Commit

Permalink
Merge pull request #19155 from bdwyertech/chef-exit-codes
Browse files Browse the repository at this point in the history
Chef: Gracefully Handle RFC062 Exit Codes
  • Loading branch information
jbardin committed May 12, 2020
2 parents 2b9cb0e + 2e5fbdf commit e912dc8
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 11 deletions.
101 changes: 96 additions & 5 deletions builtin/provisioners/chef/resource_provisioner.go
Expand Up @@ -15,6 +15,7 @@ import (
"strings"
"sync"
"text/template"
"time"

"github.com/hashicorp/terraform/communicator"
"github.com/hashicorp/terraform/communicator/remote"
Expand Down Expand Up @@ -97,13 +98,15 @@ type provisioner struct {
PolicyName string
HTTPProxy string
HTTPSProxy string
MaxRetries int
NamedRunList string
NOProxy []string
NodeName string
OhaiHints []string
OSType string
RecreateClient bool
PreventSudo bool
RetryOnExitCode map[int]bool
RunList []string
SecretKey string
ServerURL string
Expand All @@ -114,6 +117,7 @@ type provisioner struct {
UserKey string
Vaults map[string][]string
Version string
WaitForRetry time.Duration

cleanupUserKeyCmd string
createConfigFiles provisionFn
Expand Down Expand Up @@ -197,6 +201,11 @@ func Provisioner() terraform.ResourceProvisioner {
Type: schema.TypeString,
Optional: true,
},
"max_retries": &schema.Schema{
Type: schema.TypeInt,
Optional: true,
Default: 0,
},
"no_proxy": &schema.Schema{
Type: schema.TypeList,
Elem: &schema.Schema{Type: schema.TypeString},
Expand All @@ -215,14 +224,19 @@ func Provisioner() terraform.ResourceProvisioner {
Type: schema.TypeString,
Optional: true,
},
"recreate_client": &schema.Schema{
"prevent_sudo": &schema.Schema{
Type: schema.TypeBool,
Optional: true,
},
"prevent_sudo": &schema.Schema{
"recreate_client": &schema.Schema{
Type: schema.TypeBool,
Optional: true,
},
"retry_on_exit_code": &schema.Schema{
Type: schema.TypeList,
Elem: &schema.Schema{Type: schema.TypeInt},
Optional: true,
},
"run_list": &schema.Schema{
Type: schema.TypeList,
Elem: &schema.Schema{Type: schema.TypeString},
Expand Down Expand Up @@ -252,6 +266,11 @@ func Provisioner() terraform.ResourceProvisioner {
Type: schema.TypeString,
Optional: true,
},
"wait_for_retry": &schema.Schema{
Type: schema.TypeInt,
Optional: true,
Default: 30,
},
},

ApplyFunc: applyFn,
Expand Down Expand Up @@ -371,11 +390,55 @@ func applyFn(ctx context.Context) error {
once.Do(cleanupUserKey)

o.Output("Starting initial Chef-Client run...")
if err := p.runChefClient(o, comm); err != nil {
return err

for attempt := 0; attempt <= p.MaxRetries; attempt++ {
// We need a new retry context for each attempt, to make sure
// they all get the correct timeout.
retryCtx, cancel := context.WithTimeout(ctx, comm.Timeout())
defer cancel()

// Make sure to (re)connect before trying to run Chef-Client.
if err := communicator.Retry(retryCtx, func() error {
return comm.Connect(o)
}); err != nil {
return err
}

err = p.runChefClient(o, comm)
if err == nil {
return nil
}

// Allow RFC062 Exit Codes:
// https://github.com/chef/chef-rfc/blob/master/rfc062-exit-status.md
exitError, ok := err.(*remote.ExitError)
if !ok {
return err
}

switch exitError.ExitStatus {
case 35:
o.Output("Reboot has been scheduled in the run state")
err = nil
case 37:
o.Output("Reboot needs to be completed")
err = nil
case 213:
o.Output("Chef has exited during a client upgrade")
err = nil
}

if !p.RetryOnExitCode[exitError.ExitStatus] {
return err
}

if attempt < p.MaxRetries {
o.Output(fmt.Sprintf("Waiting %s before retrying Chef-Client run...", p.WaitForRetry))
time.Sleep(p.WaitForRetry)
}
}

return nil
return err
}

func validateFn(c *terraform.ResourceConfig) (ws []string, es []error) {
Expand Down Expand Up @@ -730,12 +793,14 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) {
HTTPProxy: d.Get("http_proxy").(string),
HTTPSProxy: d.Get("https_proxy").(string),
NOProxy: getStringList(d.Get("no_proxy")),
MaxRetries: d.Get("max_retries").(int),
NamedRunList: d.Get("named_run_list").(string),
NodeName: d.Get("node_name").(string),
OhaiHints: getStringList(d.Get("ohai_hints")),
OSType: d.Get("os_type").(string),
RecreateClient: d.Get("recreate_client").(bool),
PreventSudo: d.Get("prevent_sudo").(bool),
RetryOnExitCode: getRetryOnExitCodes(d),
RunList: getStringList(d.Get("run_list")),
SecretKey: d.Get("secret_key").(string),
ServerURL: d.Get("server_url").(string),
Expand All @@ -745,6 +810,7 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) {
UserName: d.Get("user_name").(string),
UserKey: d.Get("user_key").(string),
Version: d.Get("version").(string),
WaitForRetry: time.Duration(d.Get("wait_for_retry").(int)) * time.Second,
}

// Make sure the supplied URL has a trailing slash
Expand Down Expand Up @@ -794,6 +860,31 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) {
return p, nil
}

func getRetryOnExitCodes(d *schema.ResourceData) map[int]bool {
result := make(map[int]bool)

v, ok := d.GetOk("retry_on_exit_code")
if !ok || v == nil {
// Use default exit codes
result[35] = true
result[37] = true
result[213] = true
return result
}

switch v := v.(type) {
case []interface{}:
for _, vv := range v {
if vv, ok := vv.(int); ok {
result[vv] = true
}
}
return result
default:
panic(fmt.Sprintf("Unsupported type: %T", v))
}
}

func getStringList(v interface{}) []string {
var result []string

Expand Down
9 changes: 4 additions & 5 deletions communicator/winrm/communicator.go
Expand Up @@ -52,13 +52,12 @@ func New(s *terraform.InstanceState) (*Communicator, error) {

// Connect implementation of communicator.Communicator interface
func (c *Communicator) Connect(o terraform.UIOutput) error {
if c.client != nil {
return nil
}
// Set the client to nil since we'll (re)create it
c.client = nil

params := winrm.DefaultParameters
params.Timeout = formatDuration(c.Timeout())
if c.connInfo.NTLM == true {
if c.connInfo.NTLM {
params.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} }
}

Expand Down Expand Up @@ -189,7 +188,7 @@ func (c *Communicator) newCopyClient() (*winrmcp.Winrmcp, error) {
MaxOperationsPerShell: 15, // lowest common denominator
}

if c.connInfo.NTLM == true {
if c.connInfo.NTLM {
config.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} }
}

Expand Down
14 changes: 13 additions & 1 deletion website/docs/provisioners/chef.html.markdown
Expand Up @@ -57,7 +57,7 @@ resource "aws_instance" "web" {
recreate_client = true
user_name = "bork"
user_key = "${file("../bork.pem")}"
version = "12.4.1"
version = "15.10.13"
# If you have a self signed cert on your chef server change this to :verify_none
ssl_verify_mode = ":verify_peer"
}
Expand Down Expand Up @@ -109,6 +109,9 @@ The following arguments are supported:

* `https_proxy (string)` - (Optional) The proxy server for Chef Client HTTPS connections.

* `max_retries (integer)` - (Optional) The number of times to retry the provisioning process
after receiving an exit code in the `retry_on_error` list. Defaults to `0`

* `named_run_list (string)` - (Optional) The name of an alternate run-list to invoke during the
initial Chef Client run. The run-list must already exist in the Policyfile that defines
`policy_name`. Only applies when `use_policyfile` is `true`.
Expand All @@ -131,6 +134,11 @@ The following arguments are supported:
* `recreate_client (boolean)` - (Optional) If `true`, first delete any existing Chef Node and
Client before registering the new Chef Client.

* `retry_on_error (array)` - (Optional) The error codes upon which Terraform should
gracefully retry the provisioning process. Intended for use with
[Chef RFC062 codes](https://github.com/chef-boneyard/chef-rfc/blob/master/rfc062-exit-status.md).
(Defaults to `[35, 37, 213]`)

* `run_list (array)` - (Optional) A list with recipes that will be invoked during the initial
Chef Client run. The run-list will also be saved to the Chef Server after a successful
initial run. Required if `use_policyfile` is `false`; ignored when `use_policyfile` is `true`
Expand Down Expand Up @@ -169,3 +177,7 @@ The following arguments are supported:

* `version (string)` - (Optional) The Chef Client version to install on the remote machine.
If not set, the latest available version will be installed.

* `wait_for_retry (integer)` - (Optional) - Amount of time in seconds to wait before
retrying the provisionining process after receiving an exit code in the `retry_on_error`
list. Defaults to `30`.

0 comments on commit e912dc8

Please sign in to comment.