Skip to content

Commit

Permalink
Merge pull request #77 from gabriel-samfira/fix-instance-retry
Browse files Browse the repository at this point in the history
Bail if we fail to cleanup failed instance
  • Loading branch information
gabriel-samfira committed Feb 7, 2023
2 parents fc1012d + 439eeee commit 226536b
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion runner/pool/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,14 @@ func (r *basePoolManager) retryFailedInstancesForOnePool(pool params.Pool) {
// TODO(gabriel-samfira): implement request throttling.
if err := r.deleteInstanceFromProvider(inst); err != nil {
log.Printf("failed to delete instance %s from provider: %s", inst.Name, err)
// Bail here, otherwise we risk creating multiple failing instances, and losing track
// of them. If Create instance failed to return a proper provider ID, we rely on the
// name to delete the instance. If we don't bail here, and end up with multiple
// instances with the same name, using the name to clean up failed instances will fail
// on any subsequent call, unless the external or native provider takes into account
// non unique names and loops over all of them. Something which is extremely hacky and
// which we would rather avoid.
return
}

// TODO(gabriel-samfira): Incrementing CreateAttempt should be done within a transaction.
Expand Down Expand Up @@ -1043,7 +1051,10 @@ func (r *basePoolManager) addPendingInstances() {
// Set the instance to "creating" before launching the goroutine. This will ensure that addPendingInstances()
// won't attempt to create the runner a second time.
if err := r.setInstanceStatus(instance.Name, providerCommon.InstanceCreating, nil); err != nil {
log.Printf("failed to update runner %s status", instance.Name)
log.Printf("failed to update runner %s status: %s", instance.Name, err)
// We failed to transition the instance to Creating. This means that garm will retry to create this instance
// when the loop runs again and we end up with multiple instances.
continue
}
go func(instance params.Instance) {
log.Printf("creating instance %s in pool %s", instance.Name, instance.PoolID)
Expand Down

0 comments on commit 226536b

Please sign in to comment.