Skip to content

Commit

Permalink
Make replace-auto-scaling-group-instances more efficient
Browse files Browse the repository at this point in the history
If the number of availability zones is two, the command doesn't
need extra instances to keep the balance.
  • Loading branch information
abicky committed May 24, 2021
1 parent 121d89f commit 356e0f8
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 63 deletions.
34 changes: 21 additions & 13 deletions internal/capacity/autoscalinggroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,18 @@ func (asg *AutoScalingGroup) launchNewInstances(oldInstanceCount int) error {
}
requiredCount := int64(oldInstanceCount)

// The new desired capacity must be a multiple of the number of availability zones,
// otherwise AZRebalance will terminate some instances unexpectedly.
// Assume that there are following instances on each availability zone:
// ap-northeast-1a: 2, ap-northeast-1c: 1, ap-northeast-1d: 2
// After increasing the desired capacity to 10, that is, launching new instances as many as the old instances,
// the number of instances will change as below:
// ap-northeast-1a: 3 (old: 2, new: 1), ap-northeast-1c: 4 (old: 1, new: 3), ap-northeast-1d: 3 (old: 2, new: 1)
// After terminating old instances, the number of instances will change as below:
// ap-northeast-1a: 1, ap-northeast-1c: 3, ap-northeast-1d: 1
// AZRebalance will launch another instance in ap-northeast-1a or ap-northeast-1d and terminate one
// in ap-northeast-1c without draining it.
if *asg.OriginalDesiredCapacity%int64(len(asg.AvailabilityZones)) > 0 {
if len(asg.AvailabilityZones) > 2 && *asg.OriginalDesiredCapacity%int64(len(asg.AvailabilityZones)) > 0 {
// If there are more than two availability zones, the new desired capacity must be a multiple of the number of
// availability zones, otherwise AZRebalance will terminate some instances unexpectedly.
// Assume that there are following instances in each availability zone:
// ap-northeast-1a: 2, ap-northeast-1c: 1, ap-northeast-1d: 2
// After increasing the desired capacity to 10, that is, launching new instances as many as the old instances,
// the number of instances will change as below:
// ap-northeast-1a: 3 (old: 2, new: 1), ap-northeast-1c: 4 (old: 1, new: 3), ap-northeast-1d: 3 (old: 2, new: 1)
// After terminating old instances, the number of instances will change as below:
// ap-northeast-1a: 1, ap-northeast-1c: 3, ap-northeast-1d: 1
// AZRebalance will launch another instance in ap-northeast-1a or ap-northeast-1d and terminate one
// in ap-northeast-1c without draining it.
requiredCount += int64(len(asg.AvailabilityZones)) - *asg.OriginalDesiredCapacity%int64(len(asg.AvailabilityZones))
}

Expand Down Expand Up @@ -360,16 +360,24 @@ func (asg *AutoScalingGroup) fetchSortedInstanceIDs(count int64) ([]string, erro

azs := make([]string, 0)
azToInstances := make(map[string][]*ec2.Instance)
azToOldInstanceCount := make(map[string]int)
for _, i := range instances {
az := *i.Placement.AvailabilityZone
if !sliceutil.Contains(azs, az) {
azs = append(azs, az)
}
azToInstances[az] = append(azToInstances[az], i)
if asg.StateSavedAt != nil && i.LaunchTime.Before(*asg.StateSavedAt) {
azToOldInstanceCount[az] += 1
}
}

sort.SliceStable(azs, func(i, j int) bool {
return len(azToInstances[azs[i]]) > len(azToInstances[azs[j]])
if len(azToInstances[azs[i]]) == len(azToInstances[azs[j]]) {
return azToOldInstanceCount[azs[i]] > azToOldInstanceCount[azs[j]]
} else {
return len(azToInstances[azs[i]]) > len(azToInstances[azs[j]])
}
})

sortedInstanceIDs := make([]string, 0, count)
Expand Down
125 changes: 75 additions & 50 deletions internal/capacity/autoscalinggroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,67 +359,92 @@ func TestNewAutoScalingGroup(t *testing.T) {
}

func TestAutoScalingGroup_ReplaceInstances(t *testing.T) {
t.Run("the desired capacity is a multiple of the number of availability zones", func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

desiredCapacity := int64(6)
maxSize := int64(8)
tests := []struct {
name string
desiredCapacity int64
maxSize int64
oldInstances []*autoscaling.Instance
newInstances []*autoscaling.Instance
}{
{
name: "the desired capacity is a multiple of the number of availability zones",
desiredCapacity: 6,
maxSize: 8,
oldInstances: append(
createInstances("ap-northeast-1a", 3),
createInstances("ap-northeast-1c", 3)...,
),
newInstances: append(
createInstances("ap-northeast-1a", 3),
createInstances("ap-northeast-1c", 3)...,
),
},
{
name: "the desired capacity is not a multiple of the number of availability zones but the number of availability zone is only two",
desiredCapacity: 7,
maxSize: 8,
oldInstances: append(
createInstances("ap-northeast-1a", 3),
createInstances("ap-northeast-1c", 4)...,
),
newInstances: append(
createInstances("ap-northeast-1a", 4),
createInstances("ap-northeast-1c", 3)...,
),
},
}

asMock := mocks.NewMockAutoScalingAPI(ctrl)
ec2Mock := mocks.NewMockEC2API(ctrl)
drainerMock := mocks.NewMockDrainer(ctrl)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

now := time.Now().UTC()
stateSavedAt := now.Format(time.RFC3339)
asMock := mocks.NewMockAutoScalingAPI(ctrl)
ec2Mock := mocks.NewMockEC2API(ctrl)
drainerMock := mocks.NewMockDrainer(ctrl)

oldInstances := append(
createInstances("ap-northeast-1a", int(desiredCapacity/2)),
createInstances("ap-northeast-1c", int(desiredCapacity/2))...,
)
oldReservations := createReservations(oldInstances, now.Add(-24*time.Hour))
now := time.Now().UTC()
stateSavedAt := now.Format(time.RFC3339)

newInstances := append(
createInstances("ap-northeast-1a", int(desiredCapacity/2)),
createInstances("ap-northeast-1c", int(desiredCapacity/2))...,
)
newReservations := createReservations(newInstances, now)
oldReservations := createReservations(tt.oldInstances, now.Add(-24*time.Hour))
newReservations := createReservations(tt.newInstances, now)

gomock.InOrder(
asMock.EXPECT().DescribeAutoScalingGroups(gomock.Any()).Return(&autoscaling.DescribeAutoScalingGroupsOutput{
AutoScalingGroups: []*autoscaling.Group{
{
AutoScalingGroupName: aws.String("autoscaling-group-name"),
AvailabilityZones: []*string{
aws.String("ap-northeast-1a"),
aws.String("ap-northeast-1c"),
gomock.InOrder(
asMock.EXPECT().DescribeAutoScalingGroups(gomock.Any()).Return(&autoscaling.DescribeAutoScalingGroupsOutput{
AutoScalingGroups: []*autoscaling.Group{
{
AutoScalingGroupName: aws.String("autoscaling-group-name"),
AvailabilityZones: []*string{
aws.String("ap-northeast-1a"),
aws.String("ap-northeast-1c"),
},
DesiredCapacity: aws.Int64(tt.desiredCapacity),
Instances: tt.oldInstances,
MaxSize: aws.Int64(tt.maxSize),
},
DesiredCapacity: aws.Int64(desiredCapacity),
Instances: oldInstances,
MaxSize: aws.Int64(maxSize),
},
},
}, nil),
}, nil),

ec2Mock.EXPECT().DescribeInstances(gomock.Any()).Return(&ec2.DescribeInstancesOutput{
Reservations: oldReservations,
}, nil),
ec2Mock.EXPECT().DescribeInstances(gomock.Any()).Return(&ec2.DescribeInstancesOutput{
Reservations: oldReservations,
}, nil),

expectLaunchNewInstances(t, asMock, oldInstances, newInstances, desiredCapacity, maxSize, stateSavedAt),
expectTerminateInstances(t, asMock, ec2Mock, drainerMock, oldInstances, newInstances, oldReservations, newReservations, desiredCapacity, maxSize),
expectRestoreState(t, asMock, desiredCapacity, maxSize, stateSavedAt),
)
expectLaunchNewInstances(t, asMock, tt.oldInstances, tt.newInstances, tt.desiredCapacity, tt.maxSize, stateSavedAt),
expectTerminateInstances(t, asMock, ec2Mock, drainerMock, tt.oldInstances, tt.newInstances, oldReservations, newReservations, tt.desiredCapacity, tt.maxSize),
expectRestoreState(t, asMock, tt.desiredCapacity, tt.maxSize, stateSavedAt),
)

group, err := capacity.NewAutoScalingGroup("autoscaling-group-name", asMock, ec2Mock)
if err != nil {
t.Fatal(err)
}
group, err := capacity.NewAutoScalingGroup("autoscaling-group-name", asMock, ec2Mock)
if err != nil {
t.Fatal(err)
}

err = group.ReplaceInstances(drainerMock)
if err != nil {
t.Errorf("err = %#v; want nil", err)
}
})
err = group.ReplaceInstances(drainerMock)
if err != nil {
t.Errorf("err = %#v; want nil", err)
}
})
}

t.Run("the desired capacity is not a multiple of the number of availability zones", func(t *testing.T) {
ctrl := gomock.NewController(t)
Expand Down

0 comments on commit 356e0f8

Please sign in to comment.