From ac3e2253793d9ae36dfc9fb9dffd2bc56ce7145b Mon Sep 17 00:00:00 2001 From: Hanwen Date: Mon, 30 Dec 2024 08:55:13 -0800 Subject: [PATCH] Configure proper priority for route tables https://github.com/aws/aws-parallelcluster-cookbook/pull/2855 made the pcluster route table/metric number larger (meaning lower priority). Thereafter, some unwanted default rules on AL2023 ec2-net-utils took priority and failed test_multiple_nics integration test on AL2023. Then, https://github.com/aws/aws-parallelcluster-cookbook/pull/2857 made the number too small, interfering route table configurations from IMDS on AL2. Therefore, this commit tries to imitate the priority prior to these two PRs. This is not the cleanest fix, because it is staying in the lucky priority rand instead of fully resolving the issue (i.e. prevent IMDS and ec2-net-utils from configuring the route tables). However, this commit is the least breaking change. So I propose to go with this commit. Metric number range before the two PRs Network card (0,0): 1000 Network card (0,1): 1000 (which was causing conflicts and the reason for all these PRs) Network card (n,1): 100n (for p5, which has 32 network card, it will be 1000-10031) Metric number range after the first PR: Network card (0,0): 1000000 Network card (0,1): 1000001 (conflict fixed :) ) Network card (n,1): 1000001+n*1000 (for p5, it will be 1000000-1031001) Metric number range after the second PR: Network card (0,0): 10 Network card (0,1): 75 Network card (n,1): 0x(hexadecimal number)n01+10 (for p5, it will be 10-12555. The hexadecimal number was accidentally introduced because bash automatically interpret numbers start with "00" as hexadecimal number) Metric number range after this commit: Network card (0,0): 1000 Network card (0,1): 1001 Network card (n,1): n01+1000 (for p5, it will be 1000-4101) Signed-off-by: Hanwen --- .../amazon-2023/network_interfaces/configure_nw_interface.sh | 4 ++-- .../default/network_interfaces/configure_nw_interface.sh | 4 ++-- .../redhat-8.network_interfaces/configure_nw_interface.sh | 4 ++-- .../files/rocky/network_interfaces/configure_nw_interface.sh | 4 ++-- .../files/ubuntu/network_interfaces/configure_nw_interface.sh | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh index 52b66cc361..584861ef81 100644 --- a/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh @@ -24,8 +24,8 @@ fi cd "$configuration_directory" -SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%02d" $DEVICE_NUMBER) -ROUTE_TABLE="$(( $SUFFIX + 10 ))" +SUFFIX=$NETWORK_CARD_INDEX$(printf "%02d" $DEVICE_NUMBER) +ROUTE_TABLE="$(( $SUFFIX + 1000 ))" ln -s /usr/lib/systemd/network/80-ec2.network ${file_name} # Use default EC2 configuration. This include MTU, etc. diff --git a/cookbooks/aws-parallelcluster-environment/files/default/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/default/network_interfaces/configure_nw_interface.sh index cfe0f5ed24..b7e3628e86 100644 --- a/cookbooks/aws-parallelcluster-environment/files/default/network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/default/network_interfaces/configure_nw_interface.sh @@ -21,9 +21,9 @@ then exit 1 fi -SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%02d" $DEVICE_NUMBER) +SUFFIX=$NETWORK_CARD_INDEX$(printf "%02d" $DEVICE_NUMBER) -ROUTE_TABLE="$(( $SUFFIX + 10 ))" +ROUTE_TABLE="$(( $SUFFIX + 1000 ))" echo "Configuring device name: ${DEVICE_NAME} with IP:${DEVICE_IP_ADDRESS} CIDR_PREFIX:${CIDR_PREFIX_LENGTH} NETMASK:${NETMASK} GW:${GW_IP_ADDRESS} ROUTING_TABLE:${ROUTE_TABLE}" diff --git a/cookbooks/aws-parallelcluster-environment/files/redhat-8.network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/redhat-8.network_interfaces/configure_nw_interface.sh index c743393199..7c12bbe95c 100644 --- a/cookbooks/aws-parallelcluster-environment/files/redhat-8.network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/redhat-8.network_interfaces/configure_nw_interface.sh @@ -24,8 +24,8 @@ then fi con_name="System ${DEVICE_NAME}" -SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%02d" $DEVICE_NUMBER) -route_table="$(( $SUFFIX + 10 ))" +SUFFIX=$NETWORK_CARD_INDEX$(printf "%02d" $DEVICE_NUMBER) +route_table="$(( $SUFFIX + 1000 ))" priority="${route_table}" metric="${route_table}" diff --git a/cookbooks/aws-parallelcluster-environment/files/rocky/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/rocky/network_interfaces/configure_nw_interface.sh index c743393199..7c12bbe95c 100644 --- a/cookbooks/aws-parallelcluster-environment/files/rocky/network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/rocky/network_interfaces/configure_nw_interface.sh @@ -24,8 +24,8 @@ then fi con_name="System ${DEVICE_NAME}" -SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%02d" $DEVICE_NUMBER) -route_table="$(( $SUFFIX + 10 ))" +SUFFIX=$NETWORK_CARD_INDEX$(printf "%02d" $DEVICE_NUMBER) +route_table="$(( $SUFFIX + 1000 ))" priority="${route_table}" metric="${route_table}" diff --git a/cookbooks/aws-parallelcluster-environment/files/ubuntu/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/ubuntu/network_interfaces/configure_nw_interface.sh index f67450939e..192025f1a4 100644 --- a/cookbooks/aws-parallelcluster-environment/files/ubuntu/network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/ubuntu/network_interfaces/configure_nw_interface.sh @@ -41,8 +41,8 @@ if [ "${STATIC_IP_CONFIG}" = "" ] fi FILE="/etc/netplan/${DEVICE_NAME}.yaml" -SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%02d" $DEVICE_NUMBER) -ROUTE_TABLE="$(( $SUFFIX + 10 ))" +SUFFIX=$NETWORK_CARD_INDEX$(printf "%02d" $DEVICE_NUMBER) +ROUTE_TABLE="$(( $SUFFIX + 1000 ))" echo "Configuring ${DEVICE_NAME} with IP:${DEVICE_IP_ADDRESS} CIDR_PREFIX:${CIDR_PREFIX_LENGTH} NETMASK:${NETMASK} GW:${GW_IP_ADDRESS} ROUTING_TABLE:${ROUTE_TABLE}"