aws · hgreebe · Aug 8, 2025 · Aug 4, 2025 · Aug 7, 2025 · Aug 7, 2025
@@ -13,6 +13,7 @@
     DEFAULT_EPHEMERAL_DIR,
     NODE_BOOTSTRAP_TIMEOUT,
     OS_MAPPING,
+    P6E_GB200,
     PCLUSTER_COMPUTE_RESOURCE_NAME_TAG,
     PCLUSTER_QUEUE_NAME_TAG,
     PCLUSTER_S3_ARTIFACTS_DICT,
@@ -150,33 +151,7 @@ def _add_compute_resource_launch_template(
         instance_profiles,
         is_detailed_monitoring_enabled,
     ):
-        # LT network interfaces
-        compute_lt_nw_interfaces = [
-            ec2.CfnLaunchTemplate.NetworkInterfaceProperty(
-                device_index=0,
-                network_card_index=0,
-                associate_public_ip_address=queue.networking.assign_public_ip,
-                interface_type="efa" if compute_resource.efa and compute_resource.efa.enabled else None,
-                groups=queue_lt_security_groups,
-                subnet_id=(
-                    queue.networking.subnet_ids[0] if isinstance(compute_resource, SlurmComputeResource) else None
-                ),
-            )
-        ]
-
-        for network_card in compute_resource.network_cards_list[1:]:
-            compute_lt_nw_interfaces.append(
-                ec2.CfnLaunchTemplate.NetworkInterfaceProperty(
-                    device_index=0 if network_card.maximum_network_interfaces() == 1 else 1,
-                    network_card_index=network_card.network_card_index(),
-                    associate_public_ip_address=False,
-                    interface_type="efa" if compute_resource.efa and compute_resource.efa.enabled else None,
-                    groups=queue_lt_security_groups,
-                    subnet_id=(
-                        queue.networking.subnet_ids[0] if isinstance(compute_resource, SlurmComputeResource) else None
-                    ),
-                )
-            )
+        compute_lt_nw_interfaces = add_network_interfaces(queue, compute_resource, queue_lt_security_groups)
 
         conditional_template_properties = {}
         if compute_resource.is_ebs_optimized:
@@ -385,3 +360,51 @@ def _add_compute_resource_launch_template(
         )
 
         return launch_template
+
+
+def add_network_interfaces(
+    queue,
+    compute_resource,
+    queue_lt_security_groups,
+):
+    """Generate launch template network interfaces list."""
+    is_gb200 = compute_resource.instance_types[0].split(".")[0] == P6E_GB200
+    efa_enabled = compute_resource.efa and compute_resource.efa.enabled
+    interface_type = "efa" if efa_enabled and not is_gb200 else None
+
+    compute_lt_nw_interfaces = [
+        ec2.CfnLaunchTemplate.NetworkInterfaceProperty(
+            device_index=0,
+            network_card_index=0,
+            associate_public_ip_address=queue.networking.assign_public_ip,
+            interface_type=interface_type,
+            groups=queue_lt_security_groups,
+            subnet_id=(queue.networking.subnet_ids[0] if isinstance(compute_resource, SlurmComputeResource) else None),
+        )
+    ]
+
+    for network_card in compute_resource.network_cards_list[1:]:
+        even = network_card.network_card_index() % 2 == 0
+        # if efa is disabled, and we have a gb200 instance we skip configuring odd numbered indexes because they only
+        # support efa-only interface type
+        if is_gb200 and not efa_enabled and not even:
+            continue
+
+        interface_type = "efa" if efa_enabled else None
+        # if efa is enabled with a gb200 instance, even indexes are configured as efa and the odd as efa-only
+        if is_gb200 and efa_enabled:
+            interface_type = "efa" if even else "efa-only"
+
+        compute_lt_nw_interfaces.append(
+            ec2.CfnLaunchTemplate.NetworkInterfaceProperty(
+                device_index=0 if network_card.maximum_network_interfaces() == 1 else 1,
+                network_card_index=network_card.network_card_index(),
+                associate_public_ip_address=False,
+                interface_type=interface_type,
+                groups=queue_lt_security_groups,
+                subnet_id=(
+                    queue.networking.subnet_ids[0] if isinstance(compute_resource, SlurmComputeResource) else None
+                ),
+            )
+        )
+    return compute_lt_nw_interfaces
@@ -1,11 +1,13 @@
 import json
+from unittest.mock import MagicMock
 
 import pytest
 from assertpy import assert_that
 from freezegun import freeze_time
 
 from pcluster.schemas.cluster_schema import ClusterSchema
 from pcluster.templates.cdk_builder import CDKTemplateBuilder
+from pcluster.templates.queues_stack import add_network_interfaces
 from pcluster.utils import load_json_dict, load_yaml_dict
 from tests.pcluster.aws.dummy_aws_api import mock_aws_api
 from tests.pcluster.models.dummy_s3_bucket import dummy_cluster_bucket, mock_bucket_object_utils
@@ -152,6 +154,97 @@ def test_compute_nodes_dna_json(
     assert_that(json.loads(compute_node_extra_json)).is_equal_to(expected_compute_node_extra_json)
 
 
+class NetworkCard:
+    def __init__(self, index, max_interfaces=1):
+        self._index = index
+        self._max_interfaces = max_interfaces
+
+    def network_card_index(self):
+        return self._index
+
+    def maximum_network_interfaces(self):
+        return self._max_interfaces
+
+
+@pytest.mark.parametrize(
+    "efa_enabled, instance_type, network_cards_list, expected_interfaces",
+    [
+        (
+            True,
+            "p6e-gb200.WHATEVER_SIZE",
+            [NetworkCard(0), NetworkCard(1), NetworkCard(2, 2), NetworkCard(3), NetworkCard(4, 2)],
+            [
+                {"network_card_index": 0, "interface_type": None, "device_index": 0},
+                {"network_card_index": 1, "interface_type": "efa-only", "device_index": 0},
+                {"network_card_index": 2, "interface_type": "efa", "device_index": 1},
+                {"network_card_index": 3, "interface_type": "efa-only", "device_index": 0},
+                {"network_card_index": 4, "interface_type": "efa", "device_index": 1},
+            ],
+        ),
+        (
+            False,
+            "p6e-gb200.WHATEVER_SIZE",
+            [NetworkCard(0), NetworkCard(1), NetworkCard(2, 2), NetworkCard(3), NetworkCard(4, 2)],
+            [
+                {"network_card_index": 0, "interface_type": None, "device_index": 0},
+                {"network_card_index": 2, "interface_type": None, "device_index": 1},
+                {"network_card_index": 4, "interface_type": None, "device_index": 1},
+            ],
+        ),
+        (
+            True,
+            "NOTp6e-gb200.WHATEVER_SIZE",
+            [NetworkCard(0), NetworkCard(1, 2), NetworkCard(2, 2)],
+            [
+                {"network_card_index": 0, "interface_type": "efa", "device_index": 0},
+                {"network_card_index": 1, "interface_type": "efa", "device_index": 1},
+                {"network_card_index": 2, "interface_type": "efa", "device_index": 1},
+            ],
+        ),
+        (
+            True,
+            "NOTp6e-gb200.WHATEVER_SIZE",
+            [NetworkCard(0), NetworkCard(1), NetworkCard(2)],
+            [
+                {"network_card_index": 0, "interface_type": "efa", "device_index": 0},
+                {"network_card_index": 1, "interface_type": "efa", "device_index": 0},
+                {"network_card_index": 2, "interface_type": "efa", "device_index": 0},
+            ],
+        ),
+        (
+            False,
+            "NOTp6e-gb200.WHATEVER_SIZE",
+            [NetworkCard(0), NetworkCard(1, 2), NetworkCard(2, 2)],
+            [
+                {"network_card_index": 0, "interface_type": None, "device_index": 0},
+                {"network_card_index": 1, "interface_type": None, "device_index": 1},
+                {"network_card_index": 2, "interface_type": None, "device_index": 1},
+            ],
+        ),
+    ],
+)
+def test_add_compute_resource_launch_template(
+    mocker, efa_enabled, instance_type, test_datadir, network_cards_list, expected_interfaces
+):
+    mock_compute_resource = MagicMock()
+    mock_compute_resource.name = "test-compute-resource"
+    mock_compute_resource.instance_types = [instance_type]
+    mock_compute_resource.efa.enabled = efa_enabled
+    mock_compute_resource.network_cards_list = network_cards_list
+
+    mock_queue = MagicMock()
+    mock_queue.name = "test-queue"
+
+    network_interfaces = add_network_interfaces(mock_queue, mock_compute_resource, ["sg-12345"])
+
+    assert len(network_interfaces) == len(expected_interfaces)
+
+    for actual, expected in zip(network_interfaces, expected_interfaces):
+        assert actual.network_card_index == expected["network_card_index"]
+        assert actual.interface_type == expected["interface_type"]
+        assert actual.device_index == expected["device_index"]
+
+
 def render_join(elem: dict):
     sep = str(elem[0])
     body = elem[1]