From d1a909380816297b129eabc4cfa1784f48359fae Mon Sep 17 00:00:00 2001
From: James Rasell <jrasell@users.noreply.github.com>
Date: Fri, 27 Oct 2023 14:16:41 +0100
Subject: [PATCH] scheduler: ensure dup alloc names are fixed before plan
 submit. (#18873)

This change fixes a bug within the generic scheduler which meant
duplicate alloc indexes (names) could be submitted to the plan
applier and written to state. The bug originates from the
placements calculation notion that names of allocations being
replaced are blindly copied to their replacement. This is not
correct in all cases, particularly when dealing with canaries.

The fix updates the alloc name index tracker to include minor
duplicate tracking. This can be used when computing placements to
ensure duplicate are found, and a new name picked before the plan
is submitted. The name index tracking is now passed from the
reconciler to the generic scheduler via the results, so this does
not have to be regenerated, or another data structure used.
---
 .changelog/18873.txt             |   3 +
 nomad/structs/structs.go         |  12 +-
 scheduler/generic_sched.go       |  35 +++-
 scheduler/generic_sched_test.go  | 299 +++++++++++++++++++++++++++++++
 scheduler/reconcile.go           |  26 ++-
 scheduler/reconcile_util.go      |  65 +++++--
 scheduler/reconcile_util_test.go |  51 +++++-
 7 files changed, 467 insertions(+), 24 deletions(-)
 create mode 100644 .changelog/18873.txt

diff --git a/.changelog/18873.txt b/.changelog/18873.txt
new file mode 100644
index 000000000000..11ef1fc96605
--- /dev/null
+++ b/.changelog/18873.txt
@@ -0,0 +1,3 @@
+```release-note:bug
+scheduler: Ensure duplicate allocation IDs are tracked and fixed when performing job updates
+```
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 4a2052c0aa13..698f0912f5db 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -10680,13 +10680,19 @@ func (a *Allocation) JobNamespacedID() NamespacedID {
 // Index returns the index of the allocation. If the allocation is from a task
 // group with count greater than 1, there will be multiple allocations for it.
 func (a *Allocation) Index() uint {
-	l := len(a.Name)
-	prefix := len(a.JobID) + len(a.TaskGroup) + 2
+	return AllocIndexFromName(a.Name, a.JobID, a.TaskGroup)
+}
+
+// AllocIndexFromName returns the index of an allocation given its name, the
+// jobID and the task group name.
+func AllocIndexFromName(allocName, jobID, taskGroup string) uint {
+	l := len(allocName)
+	prefix := len(jobID) + len(taskGroup) + 2
 	if l <= 3 || l <= prefix {
 		return uint(0)
 	}
 
-	strNum := a.Name[prefix : len(a.Name)-1]
+	strNum := allocName[prefix : len(allocName)-1]
 	num, _ := strconv.Atoi(strNum)
 	return uint(num)
 }
diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go
index 3314614c2c9e..819845f78baf 100644
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -466,7 +466,7 @@ func (s *GenericScheduler) computeJobAllocs() error {
 		s.queuedAllocs[p.placeTaskGroup.Name] += 1
 		destructive = append(destructive, p)
 	}
-	return s.computePlacements(destructive, place)
+	return s.computePlacements(destructive, place, results.taskGroupAllocNameIndexes)
 }
 
 // downgradedJobForPlacement returns the job appropriate for non-canary placement replacement
@@ -508,7 +508,8 @@ func (s *GenericScheduler) downgradedJobForPlacement(p placementResult) (string,
 
 // computePlacements computes placements for allocations. It is given the set of
 // destructive updates to place and the set of new placements to place.
-func (s *GenericScheduler) computePlacements(destructive, place []placementResult) error {
+func (s *GenericScheduler) computePlacements(destructive, place []placementResult, nameIndex map[string]*allocNameIndex) error {
+
 	// Get the base nodes
 	nodes, byDC, err := s.setNodes(s.job)
 	if err != nil {
@@ -531,6 +532,12 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
 			// Get the task group
 			tg := missing.TaskGroup()
 
+			// This is populated from the reconciler via the compute results,
+			// therefore we cannot have an allocation belonging to a task group
+			// that has not generated and been through allocation name index
+			// tracking.
+			taskGroupNameIndex := nameIndex[tg.Name]
+
 			var downgradedJob *structs.Job
 
 			if missing.DowngradeNonCanary() {
@@ -628,12 +635,34 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
 					resources.Shared.Ports = option.AllocResources.Ports
 				}
 
+				// Pull the allocation name as a new variables, so we can alter
+				// this as needed without making changes to the original
+				// object.
+				newAllocName := missing.Name()
+
+				// Identify the index from the name, so we can check this
+				// against the allocation name index tracking for duplicates.
+				allocIndex := structs.AllocIndexFromName(newAllocName, s.job.ID, tg.Name)
+
+				// If the allocation index is a duplicate, we cannot simply
+				// create a new allocation with the same name. We need to
+				// generate a new index and use this. The log message is useful
+				// for debugging and development, but could be removed in a
+				// future version of Nomad.
+				if taskGroupNameIndex.IsDuplicate(allocIndex) {
+					oldAllocName := newAllocName
+					newAllocName = taskGroupNameIndex.Next(1)[0]
+					taskGroupNameIndex.UnsetIndex(allocIndex)
+					s.logger.Debug("duplicate alloc index found and changed",
+						"old_alloc_name", oldAllocName, "new_alloc_name", newAllocName)
+				}
+
 				// Create an allocation for this
 				alloc := &structs.Allocation{
 					ID:                 uuid.Generate(),
 					Namespace:          s.job.Namespace,
 					EvalID:             s.eval.ID,
-					Name:               missing.Name(),
+					Name:               newAllocName,
 					JobID:              s.job.ID,
 					TaskGroup:          tg.Name,
 					Metrics:            s.ctx.Metrics(),
diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go
index 802d240ec7b2..edbca23ac3f4 100644
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@@ -1970,6 +1970,305 @@ func TestServiceSched_JobModify(t *testing.T) {
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
 
+func TestServiceSched_JobModify_ExistingDuplicateAllocIndex(t *testing.T) {
+	ci.Parallel(t)
+
+	testHarness := NewHarness(t)
+
+	// Create some nodes
+	var nodes []*structs.Node
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		nodes = append(nodes, node)
+		must.NoError(t, testHarness.State.UpsertNode(structs.MsgTypeTestSetup, testHarness.NextIndex(), node))
+	}
+
+	// Generate a fake job with allocations
+	mockJob := mock.Job()
+	must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob))
+
+	// Generate some allocations which will represent our pre-existing
+	// allocations. These have aggressive duplicate names.
+	var allocs []*structs.Allocation
+	for i := 0; i < 10; i++ {
+		alloc := mock.Alloc()
+		alloc.Job = mockJob
+		alloc.JobID = mockJob.ID
+		alloc.NodeID = nodes[i].ID
+
+		alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
+
+		if i%2 == 0 {
+			alloc.Name = "my-job.web[0]"
+		}
+		allocs = append(allocs, alloc)
+	}
+	must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, testHarness.NextIndex(), allocs))
+
+	// Generate a job modification which will force a destructive update.
+	mockJob2 := mock.Job()
+	mockJob2.ID = mockJob.ID
+	mockJob2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
+	must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob2))
+
+	// Create a mock evaluation which represents work to reconcile the job
+	// update.
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       mockJob2.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	must.NoError(t, testHarness.State.UpsertEvals(structs.MsgTypeTestSetup, testHarness.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation and ensure we get a single plan as a result.
+	must.NoError(t, testHarness.Process(NewServiceScheduler, eval))
+	must.Len(t, 1, testHarness.Plans)
+
+	// Iterate and track the node allocations to ensure we have the correct
+	// amount, and that there a now no duplicate names.
+	totalNodeAllocations := 0
+	allocIndexNames := make(map[string]int)
+
+	for _, planNodeAlloc := range testHarness.Plans[0].NodeAllocation {
+		for _, nodeAlloc := range planNodeAlloc {
+			totalNodeAllocations++
+			allocIndexNames[nodeAlloc.Name]++
+
+			if val, ok := allocIndexNames[nodeAlloc.Name]; ok && val > 1 {
+				t.Fatalf("found duplicate alloc name %q found", nodeAlloc.Name)
+			}
+		}
+	}
+	must.Eq(t, 10, totalNodeAllocations)
+
+	testHarness.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestServiceSched_JobModify_ProposedDuplicateAllocIndex(t *testing.T) {
+	ci.Parallel(t)
+
+	testHarness := NewHarness(t)
+
+	// Create some nodes
+	var nodes []*structs.Node
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		nodes = append(nodes, node)
+		must.NoError(t, testHarness.State.UpsertNode(structs.MsgTypeTestSetup, testHarness.NextIndex(), node))
+	}
+
+	// Generate a job which includes a canary update strategy.
+	mockJob := mock.MinJob()
+	mockJob.TaskGroups[0].Count = 3
+	mockJob.Update = structs.UpdateStrategy{
+		Canary:      1,
+		MaxParallel: 3,
+	}
+	must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob))
+
+	// Generate some allocations which will represent our pre-existing
+	// allocations.
+	var allocs []*structs.Allocation
+	for i := 0; i < 3; i++ {
+		alloc := mock.MinAlloc()
+		alloc.Namespace = structs.DefaultNamespace
+		alloc.Job = mockJob
+		alloc.JobID = mockJob.ID
+		alloc.NodeID = nodes[i].ID
+		alloc.Name = structs.AllocName(mockJob.ID, mockJob.TaskGroups[0].Name, uint(i))
+		allocs = append(allocs, alloc)
+	}
+	must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, testHarness.NextIndex(), allocs))
+
+	// Generate a job modification which will force a destructive update as
+	// well as a scaling.
+	mockJob2 := mockJob.Copy()
+	mockJob2.Version++
+	mockJob2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other"
+	mockJob2.TaskGroups[0].Count++
+	must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob2))
+
+	nextRaftIndex := testHarness.NextIndex()
+	deploymentID := uuid.Generate()
+
+	// Upsert a canary into state, this represents the first stage of the
+	// deployment process and jumps us to the point where duplicate allocation
+	// indexes could be produced.
+	canaryAlloc := mock.MinAlloc()
+	canaryAlloc.Namespace = structs.DefaultNamespace
+	canaryAlloc.Job = mockJob2
+	canaryAlloc.JobID = mockJob2.ID
+	canaryAlloc.NodeID = nodes[1].ID
+	canaryAlloc.Name = structs.AllocName(mockJob2.ID, mockJob2.TaskGroups[0].Name, uint(0))
+	canaryAlloc.DeploymentID = deploymentID
+	canaryAlloc.ClientStatus = structs.AllocClientStatusRunning
+	must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, nextRaftIndex, []*structs.Allocation{
+		canaryAlloc,
+	}))
+
+	// Craft our deployment object which represents the post-canary state. This
+	// unblocks the rest of the deployment process, where we replace the old
+	// job version allocations.
+	canaryDeployment := structs.Deployment{
+		ID:         deploymentID,
+		Namespace:  mockJob2.Namespace,
+		JobID:      mockJob2.ID,
+		JobVersion: mockJob2.Version,
+		TaskGroups: map[string]*structs.DeploymentState{
+			mockJob2.TaskGroups[0].Name: {
+				Promoted:       true,
+				DesiredTotal:   4,
+				HealthyAllocs:  1,
+				PlacedAllocs:   1,
+				PlacedCanaries: []string{canaryAlloc.ID},
+			},
+		},
+		Status:            structs.DeploymentStatusRunning,
+		StatusDescription: structs.DeploymentStatusDescriptionRunning,
+		EvalPriority:      50,
+		JobCreateIndex:    mockJob2.CreateIndex,
+	}
+	must.NoError(t, testHarness.State.UpsertDeployment(nextRaftIndex, &canaryDeployment))
+
+	// Create a mock evaluation which represents work to reconcile the job
+	// update.
+	eval := &structs.Evaluation{
+		Namespace:    structs.DefaultNamespace,
+		ID:           uuid.Generate(),
+		Priority:     50,
+		TriggeredBy:  structs.EvalTriggerJobRegister,
+		JobID:        mockJob2.ID,
+		Status:       structs.EvalStatusPending,
+		DeploymentID: deploymentID,
+	}
+	must.NoError(t, testHarness.State.UpsertEvals(structs.MsgTypeTestSetup, testHarness.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation and ensure we get a single plan as a result.
+	must.NoError(t, testHarness.Process(NewServiceScheduler, eval))
+	must.Len(t, 1, testHarness.Plans)
+
+	// Iterate and track the node allocations to ensure we have the correct
+	// amount, and that there a now no duplicate names. Before the duplicate
+	// allocation name fix, this section of testing would fail.
+	totalNodeAllocations := 0
+	allocIndexNames := map[string]int{canaryAlloc.Name: 1}
+
+	for _, planNodeAlloc := range testHarness.Plans[0].NodeAllocation {
+		for _, nodeAlloc := range planNodeAlloc {
+			totalNodeAllocations++
+			allocIndexNames[nodeAlloc.Name]++
+
+			if val, ok := allocIndexNames[nodeAlloc.Name]; ok && val > 1 {
+				t.Fatalf("found duplicate alloc name %q found", nodeAlloc.Name)
+			}
+		}
+	}
+	must.Eq(t, 3, totalNodeAllocations)
+
+	// Ensure the correct number of destructive node updates.
+	totalNodeUpdates := 0
+
+	for _, planNodeUpdate := range testHarness.Plans[0].NodeUpdate {
+		totalNodeUpdates += len(planNodeUpdate)
+	}
+	must.Eq(t, 3, totalNodeUpdates)
+
+	testHarness.AssertEvalStatus(t, structs.EvalStatusComplete)
+}
+
+func TestServiceSched_JobModify_ExistingDuplicateAllocIndexNonDestructive(t *testing.T) {
+	ci.Parallel(t)
+
+	testHarness := NewHarness(t)
+
+	// Create some nodes
+	var nodes []*structs.Node
+	for i := 0; i < 10; i++ {
+		node := mock.Node()
+		nodes = append(nodes, node)
+		must.NoError(t, testHarness.State.UpsertNode(structs.MsgTypeTestSetup, testHarness.NextIndex(), node))
+	}
+
+	// Generate a fake job with allocations
+	mockJob := mock.MinJob()
+	mockJob.TaskGroups[0].Count = 10
+	must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob))
+
+	// Generate some allocations which will represent our pre-existing
+	// allocations. These have aggressive duplicate names.
+	var (
+		allocs   []*structs.Allocation
+		allocIDs []string
+	)
+	for i := 0; i < 10; i++ {
+		alloc := mock.MinAlloc()
+		alloc.Namespace = structs.DefaultNamespace
+		alloc.Job = mockJob
+		alloc.JobID = mockJob.ID
+		alloc.NodeID = nodes[i].ID
+
+		alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
+
+		if i%2 == 0 {
+			alloc.Name = "my-job.web[0]"
+		}
+		allocs = append(allocs, alloc)
+		allocIDs = append(allocIDs, alloc.ID)
+	}
+	must.NoError(t, testHarness.State.UpsertAllocs(structs.MsgTypeTestSetup, testHarness.NextIndex(), allocs))
+
+	// Generate a job modification which will be an in-place update.
+	mockJob2 := mockJob.Copy()
+	mockJob2.ID = mockJob.ID
+	mockJob2.Update.MaxParallel = 2
+	must.NoError(t, testHarness.State.UpsertJob(structs.MsgTypeTestSetup, testHarness.NextIndex(), nil, mockJob2))
+
+	// Create a mock evaluation which represents work to reconcile the job
+	// update.
+	eval := &structs.Evaluation{
+		Namespace:   structs.DefaultNamespace,
+		ID:          uuid.Generate(),
+		Priority:    50,
+		TriggeredBy: structs.EvalTriggerJobRegister,
+		JobID:       mockJob2.ID,
+		Status:      structs.EvalStatusPending,
+	}
+	must.NoError(t, testHarness.State.UpsertEvals(structs.MsgTypeTestSetup, testHarness.NextIndex(), []*structs.Evaluation{eval}))
+
+	// Process the evaluation and ensure we get a single plan as a result.
+	must.NoError(t, testHarness.Process(NewServiceScheduler, eval))
+	must.Len(t, 1, testHarness.Plans)
+
+	// Ensure the plan did not want to perform any destructive updates.
+	var nodeUpdateCount int
+
+	for _, nodeUpdateAllocs := range testHarness.Plans[0].NodeUpdate {
+		nodeUpdateCount += len(nodeUpdateAllocs)
+	}
+	must.Zero(t, nodeUpdateCount)
+
+	// Ensure the plan updated the existing allocs by checking the count, the
+	// job object, and the allocation IDs.
+	var (
+		nodeAllocationCount int
+		nodeAllocationIDs   []string
+	)
+
+	for _, nodeAllocs := range testHarness.Plans[0].NodeAllocation {
+		nodeAllocationCount += len(nodeAllocs)
+
+		for _, nodeAlloc := range nodeAllocs {
+			must.Eq(t, mockJob2, nodeAlloc.Job)
+			nodeAllocationIDs = append(nodeAllocationIDs, nodeAlloc.ID)
+		}
+	}
+	must.Eq(t, 10, nodeAllocationCount)
+	must.SliceContainsAll(t, allocIDs, nodeAllocationIDs)
+}
+
 func TestServiceSched_JobModify_Datacenters(t *testing.T) {
 	ci.Parallel(t)
 
diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index ed61112f84ee..9870548e761c 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -141,6 +141,13 @@ type reconcileResults struct {
 	// desiredFollowupEvals is the map of follow up evaluations to create per task group
 	// This is used to create a delayed evaluation for rescheduling failed allocations.
 	desiredFollowupEvals map[string][]*structs.Evaluation
+
+	// taskGroupAllocNameIndexes is a tracking of the allocation name index,
+	// keyed by the task group name. This is stored within the results, so the
+	// generic scheduler can use this to perform duplicate alloc index checks
+	// before submitting the plan. This is always non-nil and is handled within
+	// a single routine, so does not require a mutex.
+	taskGroupAllocNameIndexes map[string]*allocNameIndex
 }
 
 // delayedRescheduleInfo contains the allocation id and a time when its eligible to be rescheduled.
@@ -193,11 +200,12 @@ func NewAllocReconciler(logger log.Logger, allocUpdateFn allocUpdateType, batch
 		supportsDisconnectedClients: supportsDisconnectedClients,
 		now:                         time.Now(),
 		result: &reconcileResults{
-			attributeUpdates:     make(map[string]*structs.Allocation),
-			disconnectUpdates:    make(map[string]*structs.Allocation),
-			reconnectUpdates:     make(map[string]*structs.Allocation),
-			desiredTGUpdates:     make(map[string]*structs.DesiredUpdates),
-			desiredFollowupEvals: make(map[string][]*structs.Evaluation),
+			attributeUpdates:          make(map[string]*structs.Allocation),
+			disconnectUpdates:         make(map[string]*structs.Allocation),
+			reconnectUpdates:          make(map[string]*structs.Allocation),
+			desiredTGUpdates:          make(map[string]*structs.DesiredUpdates),
+			desiredFollowupEvals:      make(map[string][]*structs.Evaluation),
+			taskGroupAllocNameIndexes: make(map[string]*allocNameIndex),
 		},
 	}
 }
@@ -494,6 +502,7 @@ func (a *allocReconciler) computeGroup(groupName string, all allocSet) bool {
 	// which is the union of untainted, rescheduled, allocs on migrating
 	// nodes, and allocs on down nodes (includes canaries)
 	nameIndex := newAllocNameIndex(a.jobID, groupName, tg.Count, untainted.union(migrate, rescheduleNow, lost))
+	a.result.taskGroupAllocNameIndexes[groupName] = nameIndex
 
 	// Stop any unneeded allocations and update the untainted set to not
 	// include stopped allocations.
@@ -988,6 +997,13 @@ func (a *allocReconciler) computeStop(group *structs.TaskGroup, nameIndex *alloc
 	knownUntainted := untainted.filterOutByClientStatus(structs.AllocClientStatusUnknown)
 
 	// Hot path the nothing to do case
+	//
+	// Note that this path can result in duplication allocation indexes in a
+	// scenario with a destructive job change (ex. image update) happens with
+	// an increased group count. Once the canary is replaced, and we compute
+	// the next set of stops, the untainted set equals the new group count,
+	// which results is missing one removal. The duplicate alloc index is
+	// corrected in `computePlacements`
 	remove := len(knownUntainted) + len(migrate) - group.Count
 	if remove <= 0 {
 		return stop
diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go
index 3f9c309214fa..7cf7c3fb14c1 100644
--- a/scheduler/reconcile_util.go
+++ b/scheduler/reconcile_util.go
@@ -614,24 +614,35 @@ type allocNameIndex struct {
 	job, taskGroup string
 	count          int
 	b              structs.Bitmap
+
+	// duplicates is used to store duplicate allocation indexes which are
+	// currently present within the index tracking. The map key is the index,
+	// and the current count of duplicates. The map is only accessed within a
+	// single routine and multiple times per job scheduler invocation,
+	// therefore no lock is used.
+	duplicates map[uint]int
 }
 
 // newAllocNameIndex returns an allocNameIndex for use in selecting names of
 // allocations to create or stop. It takes the job and task group name, desired
 // count and any existing allocations as input.
 func newAllocNameIndex(job, taskGroup string, count int, in allocSet) *allocNameIndex {
+
+	bitMap, duplicates := bitmapFrom(in, uint(count))
+
 	return &allocNameIndex{
-		count:     count,
-		b:         bitmapFrom(in, uint(count)),
-		job:       job,
-		taskGroup: taskGroup,
+		count:      count,
+		b:          bitMap,
+		job:        job,
+		taskGroup:  taskGroup,
+		duplicates: duplicates,
 	}
 }
 
 // bitmapFrom creates a bitmap from the given allocation set and a minimum size
 // maybe given. The size of the bitmap is as the larger of the passed minimum
 // and the maximum alloc index of the passed input (byte aligned).
-func bitmapFrom(input allocSet, minSize uint) structs.Bitmap {
+func bitmapFrom(input allocSet, minSize uint) (structs.Bitmap, map[uint]int) {
 	var max uint
 	for _, a := range input {
 		if num := a.Index(); num > max {
@@ -666,11 +677,25 @@ func bitmapFrom(input allocSet, minSize uint) structs.Bitmap {
 		panic(err)
 	}
 
+	// Initialize our duplicates mapping, allowing us to store a non-nil map
+	// at the cost of 48 bytes.
+	duplicates := make(map[uint]int)
+
+	// Iterate through the allocSet input and hydrate the bitmap. We check that
+	// the bitmap does not contain the index first, so we can track duplicate
+	// entries.
 	for _, a := range input {
-		bitmap.Set(a.Index())
+
+		allocIndex := a.Index()
+
+		if bitmap.Check(allocIndex) {
+			duplicates[allocIndex]++
+		} else {
+			bitmap.Set(allocIndex)
+		}
 	}
 
-	return bitmap
+	return bitmap, duplicates
 }
 
 // Highest removes and returns the highest n used names. The returned set
@@ -689,9 +714,25 @@ func (a *allocNameIndex) Highest(n uint) map[string]struct{} {
 	return h
 }
 
+// IsDuplicate checks whether the passed allocation index is duplicated within
+// the tracking.
+func (a *allocNameIndex) IsDuplicate(idx uint) bool {
+	val, ok := a.duplicates[idx]
+	return ok && val > 0
+}
+
 // UnsetIndex unsets the index as having its name used
 func (a *allocNameIndex) UnsetIndex(idx uint) {
-	a.b.Unset(idx)
+
+	// If this index is a duplicate, remove the duplicate entry. Otherwise, we
+	// can remove it from the bitmap tracking.
+	if num, ok := a.duplicates[idx]; ok {
+		if num--; num == 0 {
+			delete(a.duplicates, idx)
+		}
+	} else {
+		a.b.Unset(idx)
+	}
 }
 
 // NextCanaries returns the next n names for use as canaries and sets them as
@@ -702,9 +743,11 @@ func (a *allocNameIndex) NextCanaries(n uint, existing, destructive allocSet) []
 	// Create a name index
 	existingNames := existing.nameSet()
 
-	// First select indexes from the allocations that are undergoing destructive
-	// updates. This way we avoid duplicate names as they will get replaced.
-	dmap := bitmapFrom(destructive, uint(a.count))
+	// First select indexes from the allocations that are undergoing
+	// destructive updates. This way we avoid duplicate names as they will get
+	// replaced. As this process already takes into account duplicate checking,
+	// we can discard the duplicate mapping when generating the bitmap.
+	dmap, _ := bitmapFrom(destructive, uint(a.count))
 	remainder := n
 	for _, idx := range dmap.IndexesInRange(true, uint(0), uint(a.count)-1) {
 		name := structs.AllocName(a.job, a.taskGroup, uint(idx))
diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go
index d8a1d9f219df..600b9b4918a0 100644
--- a/scheduler/reconcile_util_test.go
+++ b/scheduler/reconcile_util_test.go
@@ -948,11 +948,13 @@ func TestBitmapFrom(t *testing.T) {
 			Name:      "foo.bar[8]",
 		},
 	}
-	b := bitmapFrom(input, 1)
+	b, dups := bitmapFrom(input, 1)
 	must.Eq(t, 16, b.Size())
+	must.MapEmpty(t, dups)
 
-	b = bitmapFrom(input, 8)
+	b, dups = bitmapFrom(input, 8)
 	must.Eq(t, 16, b.Size())
+	must.MapEmpty(t, dups)
 }
 
 func Test_allocNameIndex_Highest(t *testing.T) {
@@ -1168,6 +1170,51 @@ func Test_allocNameIndex_Next(t *testing.T) {
 	}
 }
 
+func Test_allocNameIndex_Duplicates(t *testing.T) {
+	ci.Parallel(t)
+
+	inputAllocSet := map[string]*structs.Allocation{
+		"6b255fa3-c2cb-94de-5ddd-41aac25a6851": {
+			Name:      "example.cache[0]",
+			JobID:     "example",
+			TaskGroup: "cache",
+		},
+		"e24771e6-8900-5d2d-ec93-e7076284774a": {
+			Name:      "example.cache[1]",
+			JobID:     "example",
+			TaskGroup: "cache",
+		},
+		"d7842822-32c4-1a1c-bac8-66c3f20dfb0f": {
+			Name:      "example.cache[2]",
+			JobID:     "example",
+			TaskGroup: "cache",
+		},
+		"76a6a487-016b-2fc2-8295-d811473ca93d": {
+			Name:      "example.cache[0]",
+			JobID:     "example",
+			TaskGroup: "cache",
+		},
+	}
+
+	// Build the tracker, and check some key information.
+	allocNameIndexTracker := newAllocNameIndex("example", "cache", 4, inputAllocSet)
+	must.Eq(t, 8, allocNameIndexTracker.b.Size())
+	must.MapLen(t, 1, allocNameIndexTracker.duplicates)
+	must.True(t, allocNameIndexTracker.IsDuplicate(0))
+
+	// Unsetting the index should remove the duplicate entry, but not the entry
+	// from the underlying bitmap.
+	allocNameIndexTracker.UnsetIndex(0)
+	must.MapLen(t, 0, allocNameIndexTracker.duplicates)
+	must.True(t, allocNameIndexTracker.b.Check(0))
+
+	// If we now select a new index, having previously checked for a duplicate,
+	// we should get a non-duplicate.
+	nextAllocNames := allocNameIndexTracker.Next(1)
+	must.Len(t, 1, nextAllocNames)
+	must.Eq(t, "example.cache[3]", nextAllocNames[0])
+}
+
 func TestAllocSet_filterByRescheduleable(t *testing.T) {
 	ci.Parallel(t)