Skip to content

Commit

Permalink
scheduler: ensure dup alloc names are fixed before plan submit. (#18873)
Browse files Browse the repository at this point in the history
This change fixes a bug within the generic scheduler which meant
duplicate alloc indexes (names) could be submitted to the plan
applier and written to state. The bug originates from the
placements calculation notion that names of allocations being
replaced are blindly copied to their replacement. This is not
correct in all cases, particularly when dealing with canaries.

The fix updates the alloc name index tracker to include minor
duplicate tracking. This can be used when computing placements to
ensure duplicate are found, and a new name picked before the plan
is submitted. The name index tracking is now passed from the
reconciler to the generic scheduler via the results, so this does
not have to be regenerated, or another data structure used.
  • Loading branch information
jrasell authored and pkazmierczak committed Oct 30, 2023
1 parent 516b49e commit d1a9093
Show file tree
Hide file tree
Showing 7 changed files with 467 additions and 24 deletions.
3 changes: 3 additions & 0 deletions .changelog/18873.txt
@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Ensure duplicate allocation IDs are tracked and fixed when performing job updates
```
12 changes: 9 additions & 3 deletions nomad/structs/structs.go
Expand Up @@ -10680,13 +10680,19 @@ func (a *Allocation) JobNamespacedID() NamespacedID {
// Index returns the index of the allocation. If the allocation is from a task
// group with count greater than 1, there will be multiple allocations for it.
func (a *Allocation) Index() uint {
l := len(a.Name)
prefix := len(a.JobID) + len(a.TaskGroup) + 2
return AllocIndexFromName(a.Name, a.JobID, a.TaskGroup)
}

// AllocIndexFromName returns the index of an allocation given its name, the
// jobID and the task group name.
func AllocIndexFromName(allocName, jobID, taskGroup string) uint {
l := len(allocName)
prefix := len(jobID) + len(taskGroup) + 2
if l <= 3 || l <= prefix {
return uint(0)
}

strNum := a.Name[prefix : len(a.Name)-1]
strNum := allocName[prefix : len(allocName)-1]
num, _ := strconv.Atoi(strNum)
return uint(num)
}
Expand Down
35 changes: 32 additions & 3 deletions scheduler/generic_sched.go
Expand Up @@ -466,7 +466,7 @@ func (s *GenericScheduler) computeJobAllocs() error {
s.queuedAllocs[p.placeTaskGroup.Name] += 1
destructive = append(destructive, p)
}
return s.computePlacements(destructive, place)
return s.computePlacements(destructive, place, results.taskGroupAllocNameIndexes)
}

// downgradedJobForPlacement returns the job appropriate for non-canary placement replacement
Expand Down Expand Up @@ -508,7 +508,8 @@ func (s *GenericScheduler) downgradedJobForPlacement(p placementResult) (string,

// computePlacements computes placements for allocations. It is given the set of
// destructive updates to place and the set of new placements to place.
func (s *GenericScheduler) computePlacements(destructive, place []placementResult) error {
func (s *GenericScheduler) computePlacements(destructive, place []placementResult, nameIndex map[string]*allocNameIndex) error {

// Get the base nodes
nodes, byDC, err := s.setNodes(s.job)
if err != nil {
Expand All @@ -531,6 +532,12 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
// Get the task group
tg := missing.TaskGroup()

// This is populated from the reconciler via the compute results,
// therefore we cannot have an allocation belonging to a task group
// that has not generated and been through allocation name index
// tracking.
taskGroupNameIndex := nameIndex[tg.Name]

var downgradedJob *structs.Job

if missing.DowngradeNonCanary() {
Expand Down Expand Up @@ -628,12 +635,34 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
resources.Shared.Ports = option.AllocResources.Ports
}

// Pull the allocation name as a new variables, so we can alter
// this as needed without making changes to the original
// object.
newAllocName := missing.Name()

// Identify the index from the name, so we can check this
// against the allocation name index tracking for duplicates.
allocIndex := structs.AllocIndexFromName(newAllocName, s.job.ID, tg.Name)

// If the allocation index is a duplicate, we cannot simply
// create a new allocation with the same name. We need to
// generate a new index and use this. The log message is useful
// for debugging and development, but could be removed in a
// future version of Nomad.
if taskGroupNameIndex.IsDuplicate(allocIndex) {
oldAllocName := newAllocName
newAllocName = taskGroupNameIndex.Next(1)[0]
taskGroupNameIndex.UnsetIndex(allocIndex)
s.logger.Debug("duplicate alloc index found and changed",
"old_alloc_name", oldAllocName, "new_alloc_name", newAllocName)
}

// Create an allocation for this
alloc := &structs.Allocation{
ID: uuid.Generate(),
Namespace: s.job.Namespace,
EvalID: s.eval.ID,
Name: missing.Name(),
Name: newAllocName,
JobID: s.job.ID,
TaskGroup: tg.Name,
Metrics: s.ctx.Metrics(),
Expand Down

0 comments on commit d1a9093

Please sign in to comment.