Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add price-capacity-optimized AllocationStrategy #6192

Merged
merged 9 commits into from
Jun 18, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ CHANGELOG
**ENHANCEMENTS**
- Add new configuration section `Scheduling/SlurmSettings/ExternalSlurmdbd` to connect the cluster to an external Slurmdbd.
- Add support for Amazon Linux 2023.
- Add support for `price-capacity-optimized` as an `AllocationStrategy`.

**BUG FIXES**
- Fix DRA configuration to make `AutoExportPolicy` and `AutoImportPolicy` optional.
Expand Down
1 change: 1 addition & 0 deletions cli/src/pcluster/config/cluster_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2535,6 +2535,7 @@ class AllocationStrategy(Enum):

LOWEST_PRICE = "lowest-price"
CAPACITY_OPTIMIZED = "capacity-optimized"
PRICE_CAPACITY_OPTIMIZED = "price-capacity-optimized"


class SlurmQueue(_CommonQueue):
Expand Down
17 changes: 17 additions & 0 deletions cli/tests/pcluster/validators/test_instances_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,11 +593,19 @@ def test_instances_networking_validator(
"Compute Resource TestComputeResource is using an OnDemand CapacityType but the Allocation Strategy "
"specified is capacity-optimized. OnDemand CapacityType can only use 'lowest-price' allocation strategy.",
),
(
CapacityType.ONDEMAND,
AllocationStrategy.PRICE_CAPACITY_OPTIMIZED,
"Compute Resource TestComputeResource is using an OnDemand CapacityType but the Allocation Strategy "
"specified is price-capacity-optimized. "
"OnDemand CapacityType can only use 'lowest-price' allocation strategy.",
),
(CapacityType.ONDEMAND, AllocationStrategy.LOWEST_PRICE, ""),
(CapacityType.ONDEMAND, None, ""),
# Spot Capacity type supports both "lowest-price" and "capacity-optimized" allocation strategy
dreambeyondorange marked this conversation as resolved.
Show resolved Hide resolved
(CapacityType.SPOT, AllocationStrategy.LOWEST_PRICE, ""),
(CapacityType.SPOT, AllocationStrategy.CAPACITY_OPTIMIZED, ""),
(CapacityType.SPOT, AllocationStrategy.PRICE_CAPACITY_OPTIMIZED, ""),
(CapacityType.SPOT, None, ""),
# Capacity Block type supports does not support any allocation strategy
(
Expand All @@ -609,6 +617,15 @@ def test_instances_networking_validator(
"allocation strategy should not be set."
),
),
(
CapacityType.CAPACITY_BLOCK,
AllocationStrategy.PRICE_CAPACITY_OPTIMIZED,
(
"Compute Resource TestComputeResource is using a CAPACITY_BLOCK CapacityType but the Allocation "
"Strategy specified is price-capacity-optimized. When using CAPACITY_BLOCK CapacityType, "
"allocation strategy should not be set."
),
),
(
CapacityType.CAPACITY_BLOCK,
AllocationStrategy.LOWEST_PRICE,
Expand Down
6 changes: 6 additions & 0 deletions tests/integration-tests/configs/develop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,12 @@ test-suites:
instances: {{ common.INSTANCES_DEFAULT_X86 }}
oss: ["centos7"]
schedulers: ["slurm"]
test_spot.py::test_spot_price_capacity_optimized:
dimensions:
- regions: ["me-south-1"]
instances: {{ common.INSTANCES_DEFAULT_X86 }}
oss: ["centos7"]
schedulers: ["slurm"]
storage:
test_fsx_lustre.py::test_fsx_lustre:
dimensions:
Expand Down
8 changes: 7 additions & 1 deletion tests/integration-tests/configs/isolated_regions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -369,14 +369,20 @@ test-suites:
instances: {{ INSTANCES }}
oss: {{ OSS }}
schedulers: {{ SCHEDULERS }}
# This test cannot be executed in US isolated regions
# These tests cannot be executed in US isolated regions
# because Spot instances are not supported in these regions.
# spot:
# test_spot.py::test_spot_default:
# dimensions:
# - regions: {{ REGIONS }}
# instances: {{ INSTANCES }}
# oss: {{ OSS }}
# schedulers: {{ SCHEDULERS }}
# test_spot.py::test_spot_price_capacity_optimized:
# dimensions:
# - regions: {{ REGIONS }}
# instances: {{ INSTANCES }}
# oss: {{ OSS }}
# schedulers: {{ SCHEDULERS }}
storage:
test_efs.py::test_efs_compute_az:
Expand Down
11 changes: 11 additions & 0 deletions tests/integration-tests/tests/spot/test_spot.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,14 @@ def test_spot_default(scheduler_commands_factory, pcluster_config_reader, cluste
remote_command_executor = RemoteCommandExecutor(cluster)
scheduler_commands = scheduler_commands_factory(remote_command_executor)
assert_that(scheduler_commands.compute_nodes_count()).is_equal_to(min_count)


@pytest.mark.usefixtures("region", "os", "instance", "scheduler")
def test_spot_price_capacity_optimized(scheduler_commands_factory, pcluster_config_reader, clusters_factory):
"""Test that a cluster with spot instances can be created with price-capacity-optimized allocation strategy."""
min_count = 1
cluster_config = pcluster_config_reader(min_count=min_count)
cluster = clusters_factory(cluster_config)
remote_command_executor = RemoteCommandExecutor(cluster)
scheduler_commands = scheduler_commands_factory(remote_command_executor)
assert_that(scheduler_commands.compute_nodes_count()).is_equal_to(min_count)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Image:
Os: {{ os }}
HeadNode:
InstanceType: {{ instance }}
Networking:
SubnetId: {{ public_subnet_id }}
Ssh:
KeyName: {{ key_name }}
Imds:
Secured: {{ imds_secured }}
Scheduling:
Scheduler: {{ scheduler }}
{% if scheduler == "awsbatch" %}AwsBatchQueues:{% else %}SlurmQueues:{% endif %}
- Name: compute
CapacityType: SPOT
AllocationStrategy: price-capacity-optimized
ComputeResources:
- Name: compute-i1
Instances:
- InstanceType: {{ instance }}
MinCount: {{ min_count }}
Networking:
SubnetIds:
- {{ private_subnet_id }}
Loading