Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cli/src/pcluster/aws/aws_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,10 @@ def instance_type(self):
"""Return the instance type associated to the Capacity Reservation."""
return self.capacity_reservation_data.get("InstanceType")

def instance_platform(self):
"""Return the instance platform associated to the Capacity Reservation."""
return self.capacity_reservation_data.get("InstancePlatform")

def availability_zone(self):
"""Return the availability zone associated to the Capacity Reservation."""
return self.capacity_reservation_data.get("AvailabilityZone")
Expand Down
1 change: 1 addition & 0 deletions cli/src/pcluster/config/cluster_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3113,6 +3113,7 @@ def _register_validators(self, context: ValidatorContext = None): # noqa: C901
is_flexible=compute_resource.is_flexible(),
subnet=queue.networking.subnet_ids[0],
capacity_type=queue.capacity_type,
os=self.image.os,
)
self._register_validator(
CapacityReservationResourceGroupValidator,
Expand Down
19 changes: 19 additions & 0 deletions cli/src/pcluster/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,25 @@
SLURM = "slurm"
AWSBATCH = "awsbatch"


# Capacity Reservation Platform types we support.
CR_PLATFORM_LINUX_UNIX = "Linux/UNIX"
CR_PLATFORM_UBUNTU_PRO = "Ubuntu Pro"
CR_PLATFORM_RHEL = "Red Hat Enterprise Linux"


CAPACITY_RESERVATION_OS_MAP = {
"alinux2": CR_PLATFORM_LINUX_UNIX,
"alinux2023": CR_PLATFORM_LINUX_UNIX,
"ubuntu2004": CR_PLATFORM_UBUNTU_PRO,
"ubuntu2204": CR_PLATFORM_UBUNTU_PRO,
"ubuntu2404": CR_PLATFORM_UBUNTU_PRO,
"rhel8": CR_PLATFORM_RHEL,
"rocky8": CR_PLATFORM_RHEL,
"rhel9": CR_PLATFORM_RHEL,
"rocky9": CR_PLATFORM_RHEL,
}

OS_MAPPING = {
"alinux2": {"user": "ec2-user"},
"alinux2023": {"user": "ec2-user"},
Expand Down
15 changes: 14 additions & 1 deletion cli/src/pcluster/validators/ec2_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
from pcluster.aws.aws_resources import CapacityReservationInfo
from pcluster.aws.common import AWSClientError
from pcluster.config.common import CapacityType
from pcluster.constants import NVIDIA_OPENRM_UNSUPPORTED_INSTANCE_TYPES, UNSUPPORTED_OSES_FOR_MICRO_NANO
from pcluster.constants import (
CAPACITY_RESERVATION_OS_MAP,
NVIDIA_OPENRM_UNSUPPORTED_INSTANCE_TYPES,
UNSUPPORTED_OSES_FOR_MICRO_NANO,
)
from pcluster.utils import get_resource_name_from_resource_arn
from pcluster.validators.common import FailureLevel, Validator

Expand Down Expand Up @@ -331,9 +335,18 @@ def _validate(
is_flexible: bool,
subnet: str,
capacity_type: CapacityType,
os,
):
if capacity_reservation_id:
capacity_reservation = AWSApi.instance().ec2.describe_capacity_reservations([capacity_reservation_id])[0]
cr_platform = capacity_reservation.instance_platform()
if CAPACITY_RESERVATION_OS_MAP.get(os) != cr_platform:
self._add_failure(
f"Capacity reservation {capacity_reservation_id} has platform {cr_platform},"
f" which is not compatible with the cluster OS {os}. "
f"Please use a reservation with platform {CAPACITY_RESERVATION_OS_MAP.get(os)}.",
FailureLevel.ERROR,
)

if not instance_types:
# If the instance type doesn't exist, this is an invalid config,
Expand Down
3 changes: 3 additions & 0 deletions cli/tests/pcluster/validators/test_all_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,20 +398,23 @@ def test_slurm_validators_are_called_with_correct_argument(test_datadir, mocker)
call(
capacity_reservation_id="cr-34567",
instance_types=["t3.large"],
os="alinux2",
is_flexible=True,
subnet="subnet-23456789",
capacity_type=CapacityType.ONDEMAND,
),
call(
capacity_reservation_id="cr-12345",
instance_types=["t3.xlarge"],
os="alinux2",
is_flexible=True,
subnet="subnet-23456789",
capacity_type=CapacityType.CAPACITY_BLOCK,
),
call(
capacity_reservation_id="cr-23456",
instance_types=["t3.xlarge"],
os="alinux2",
is_flexible=False,
subnet="subnet-23456789",
capacity_type=CapacityType.CAPACITY_BLOCK,
Expand Down
140 changes: 122 additions & 18 deletions cli/tests/pcluster/validators/test_ec2_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,109 +708,166 @@ def test_placement_group_validator(
"is_flexible",
"subnet_availability_zone",
"capacity_type",
"os",
"expected_messages",
),
[
(
CapacityReservationInfo({"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a"}),
CapacityReservationInfo(
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "InstancePlatform": "Linux/UNIX"}
),
["c5.xlarge"],
False,
"us-east-1a",
None,
"alinux2",
[],
),
# Wrong instance type
(
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1a"}),
CapacityReservationInfo(
{"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1a", "InstancePlatform": "Linux/UNIX"}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.ONDEMAND,
"alinux2023",
["Capacity reservation .* must have the same instance type as c5.xlarge."],
),
# Wrong availability zone
(
CapacityReservationInfo({"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1b"}),
CapacityReservationInfo(
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1b", "InstancePlatform": "Linux/UNIX"}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.SPOT,
["Capacity reservation .* must use the same availability zone as subnet"],
"ubuntu2404",
[
"Capacity reservation .* has platform Linux/UNIX, which is not compatible with "
+ "the cluster OS ubuntu2404. Please use a reservation with platform Ubuntu Pro.",
"Capacity reservation .* must use the same availability zone as subnet",
],
),
# Both instance type and availability zone are wrong
(
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b"}),
CapacityReservationInfo(
{"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b", "InstancePlatform": "Ubuntu Pro"}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.ONDEMAND,
"ubuntu2204",
[
"Capacity reservation .* must have the same instance type as c5.xlarge.",
"Capacity reservation .* must use the same availability zone as subnet",
],
),
(
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b"}),
CapacityReservationInfo(
{
"InstanceType": "m5.xlarge",
"AvailabilityZone": "us-east-1b",
"InstancePlatform": "Red Hat Enterprise Linux",
}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.SPOT,
["Capacity reservation .* must use the same availability zone as subnet"],
"ubuntu2004",
[
"Capacity reservation .* has platform Red Hat Enterprise Linux, which is not "
+ "compatible with the cluster OS ubuntu2004. Please use a reservation with platform Ubuntu Pro.",
"Capacity reservation .* must use the same availability zone as subnet",
],
),
# empty instance type, this should not happen because instance type is automatically retrieved when usinc cr-id
(
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b"}),
CapacityReservationInfo(
{"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b", "InstancePlatform": "Linux/UNIX"}
),
None,
False,
"us-east-1a",
CapacityType.ONDEMAND,
"alinux2",
[
"Unexpected failure. InstanceType parameter cannot be empty when using CapacityReservationId",
"Capacity reservation .* must use the same availability zone as subnet",
],
),
# empty instance type, this should not happen because instance type is automatically retrieved when usinc cr-id
(
CapacityReservationInfo({"InstanceType": "m5.xlarge", "AvailabilityZone": "us-east-1b"}),
CapacityReservationInfo(
{
"InstanceType": "m5.xlarge",
"AvailabilityZone": "us-east-1b",
"InstancePlatform": "Red Hat Enterprise Linux",
}
),
"",
False,
"us-east-1a",
CapacityType.SPOT,
"rocky9",
[
"Unexpected failure. InstanceType parameter cannot be empty when using CapacityReservationId",
"Capacity reservation .* must use the same availability zone as subnet",
],
),
# wrong capacity type
(
CapacityReservationInfo({"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a"}),
CapacityReservationInfo(
{
"InstanceType": "c5.xlarge",
"AvailabilityZone": "us-east-1a",
"InstancePlatform": "Red Hat Enterprise Linux",
}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.CAPACITY_BLOCK,
"rocky8",
[
"Capacity reservation cr-123 is not a Capacity Block reservation. "
"It cannot be used when specifying CapacityType: CAPACITY_BLOCK."
],
),
(
CapacityReservationInfo(
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "capacity-block"}
{
"InstanceType": "c5.xlarge",
"AvailabilityZone": "us-east-1a",
"ReservationType": "capacity-block",
"InstancePlatform": "Ubuntu Pro",
}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.ONDEMAND,
"ubuntu2004",
[], # Do not check Ondemand capacity type
),
(
CapacityReservationInfo(
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "ondemand"}
{
"InstanceType": "c5.xlarge",
"AvailabilityZone": "us-east-1a",
"ReservationType": "ondemand",
"InstancePlatform": "Linux/UNIX",
}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.CAPACITY_BLOCK,
"alinux2",
[
"Capacity reservation cr-123 is not a Capacity Block reservation. "
"It cannot be used when specifying CapacityType: CAPACITY_BLOCK."
Expand All @@ -819,43 +876,88 @@ def test_placement_group_validator(
# right capacity type
(
CapacityReservationInfo(
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "ondemand"}
{
"InstanceType": "c5.xlarge",
"AvailabilityZone": "us-east-1a",
"ReservationType": "ondemand",
"InstancePlatform": "Linux/UNIX",
}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.ONDEMAND,
"alinux2023",
[],
),
(
CapacityReservationInfo(
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "capacity-block"}
{
"InstanceType": "c5.xlarge",
"AvailabilityZone": "us-east-1a",
"ReservationType": "capacity-block",
"InstancePlatform": "Linux/UNIX",
}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.CAPACITY_BLOCK,
"alinux2",
[],
),
(
CapacityReservationInfo(
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "InstancePlatform": "SUSE Linux"}
),
["c5.xlarge"],
False,
"us-east-1a",
CapacityType.ONDEMAND,
"alinux2",
[
"Capacity reservation .* has platform SUSE Linux, which is not compatible"
+ " with the cluster OS alinux2. Please use a reservation with platform Linux/UNIX."
],
),
# Flexible instance type, with a single instance and capacity_reservation_id
(
CapacityReservationInfo({"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a"}),
CapacityReservationInfo(
{
"InstanceType": "c5.xlarge",
"AvailabilityZone": "us-east-1a",
"InstancePlatform": "Linux/UNIX with HA",
}
),
["c5.xlarge"],
True,
"us-east-1a",
None,
["CapacityReservationId parameter cannot be used with Instances parameter."],
"alinux2023",
[
"Capacity reservation .* has platform Linux/UNIX with HA, which is not compatible "
+ "with the cluster OS alinux2023. Please use a reservation with platform Linux/UNIX.",
"CapacityReservationId parameter cannot be used with Instances parameter.",
],
),
# Flexible instance type with multiple instance types and capacity_reservation_id
(
CapacityReservationInfo(
{"InstanceType": "c5.xlarge", "AvailabilityZone": "us-east-1a", "ReservationType": "ondemand"}
{
"InstanceType": "c5.xlarge",
"AvailabilityZone": "us-east-1a",
"ReservationType": "ondemand",
"InstancePlatform": "Ubuntu Pro",
}
),
["c5.xlarge", "m5.2xlarge"],
True,
"us-east-1a",
CapacityType.ONDEMAND,
["CapacityReservationId parameter cannot be used with Instances parameter."],
"ubuntu2004",
[
"CapacityReservationId parameter cannot be used with Instances parameter.",
],
),
],
)
Expand All @@ -866,6 +968,7 @@ def test_capacity_reservation_validator(
subnet_availability_zone,
is_flexible,
capacity_type,
os,
expected_messages,
):
mock_aws_api(mocker)
Expand All @@ -877,6 +980,7 @@ def test_capacity_reservation_validator(
is_flexible=is_flexible,
subnet="subnet-123",
capacity_type=capacity_type,
os=os,
)
assert_failure_messages(actual_failures, expected_messages)

Expand Down
Loading