From d1c723242fa0d9e3428c72eb0c53f62107a7d1b3 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Mon, 16 Dec 2024 12:15:40 -0800 Subject: [PATCH] Support attaching multiple network interfaces to the same network card Prior to this commit, the code had two assumptions: 1. Single network card instances can have only one network interface a. therefore, when there are more than one network interface, the code made a IMDS call to retrieve device index (network interface index), which is only available on instances with multiple network cards. Having a secondary network interface on single network card instance failed the code and caused instance launch failures. 2. Each network card can have only one network interface a. therefore, the route table is unique to each network card. Having multiple network interfaces on a network card confused the code and generated wrong route tables. To fix (1), this commit uses a fallback value 0 when retrieval of device index fails. To fix (2), this commit names the route tables in a way that is unique to network interface and network card. FYI: 1. Network card is the physical card. Network interface is the virtual concept. Each network card can have multiple network interfaces (which is AWS is 1 or 2) 2. "Network interface" is synonym to "device" Signed-off-by: Hanwen --- .../network_interfaces/configure_nw_interface.sh | 10 ++++++---- .../network_interfaces/configure_nw_interface.sh | 9 ++++++--- .../configure_nw_interface.sh | 10 ++++++---- .../rocky/network_interfaces/configure_nw_interface.sh | 10 ++++++---- .../network_interfaces/configure_nw_interface.sh | 6 ++++-- .../recipes/config/network_interfaces.rb | 10 +++++++++- 6 files changed, 37 insertions(+), 18 deletions(-) diff --git a/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh index a9cfdf1039..bf42e62069 100644 --- a/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/amazon-2023/network_interfaces/configure_nw_interface.sh @@ -4,7 +4,8 @@ set -ex if [ -z "${DEVICE_NAME}" ] || # name of the device - [ -z "${DEVICE_NUMBER}" ] || # number of the device + [ -z "${DEVICE_NUMBER}" ] || # index of the device + [ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card [ -z "${DEVICE_IP_ADDRESS}" ] || # ip of the device [ -z "${MAC}" ] || # mac address of the device [ -z "${CIDR_BLOCK}" ] # CIDR block of the subnet @@ -12,7 +13,7 @@ then echo 'One or more environment variables missing' exit 1 fi -echo "Configuring NIC, Device name: ${DEVICE_NAME}, Device number: ${DEVICE_NUMBER}" +echo "Configuring NIC, Device name: ${DEVICE_NAME}, Device number: ${DEVICE_NUMBER}, Network card index:${NETWORK_CARD_INDEX}" configuration_directory="/etc/systemd/network" file_name="70-${DEVICE_NAME}.network" @@ -23,12 +24,13 @@ fi cd "$configuration_directory" -ROUTE_TABLE=100${DEVICE_NUMBER} +SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%03d" $DEVICE_NUMBER) +ROUTE_TABLE=1${SUFFIX} ln -s /usr/lib/systemd/network/80-ec2.network ${file_name} # Use default EC2 configuration. This include MTU, etc. /bin/cat < ${sub_directory}/eni.conf -# Configuration for ${DEVICE_NUMBER} generated by ParallelCluster +# Configuration for network card: ${NETWORK_CARD_INDEX}, device number: ${DEVICE_NUMBER} generated by ParallelCluster # This is inspired by https://github.com/amazonlinux/amazon-ec2-net-utils/blob/v2.4.1/lib/lib.sh [Match] MACAddress=${MAC} diff --git a/cookbooks/aws-parallelcluster-environment/files/default/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/default/network_interfaces/configure_nw_interface.sh index 36fcb4c5fc..c2bd124e3d 100644 --- a/cookbooks/aws-parallelcluster-environment/files/default/network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/default/network_interfaces/configure_nw_interface.sh @@ -10,7 +10,8 @@ set -e if [ -z "${DEVICE_NAME}" ] || # name of the device - [ -z "${DEVICE_NUMBER}" ] || # number of the device + [ -z "${DEVICE_NUMBER}" ] || # index of the device + [ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card [ -z "${GW_IP_ADDRESS}" ] || # gateway ip address [ -z "${DEVICE_IP_ADDRESS}" ] || # ip address to assign to the interface [ -z "${CIDR_PREFIX_LENGTH}" ] || # the prefix length of the device IP cidr block @@ -20,9 +21,11 @@ then exit 1 fi -ROUTE_TABLE="100${DEVICE_NUMBER}" +SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%03d" $DEVICE_NUMBER) -echo "Configuring ${DEVICE_NAME} with IP:${DEVICE_IP_ADDRESS} CIDR_PREFIX:${CIDR_PREFIX_LENGTH} NETMASK:${NETMASK} GW:${GW_IP_ADDRESS} ROUTING_TABLE:${ROUTE_TABLE}" +ROUTE_TABLE="1${SUFFIX}" + +echo "Configuring device name: ${DEVICE_NAME} with IP:${DEVICE_IP_ADDRESS} CIDR_PREFIX:${CIDR_PREFIX_LENGTH} NETMASK:${NETMASK} GW:${GW_IP_ADDRESS} ROUTING_TABLE:${ROUTE_TABLE}" # config file FILE="/etc/sysconfig/network-scripts/ifcfg-${DEVICE_NAME}" diff --git a/cookbooks/aws-parallelcluster-environment/files/redhat-8.network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/redhat-8.network_interfaces/configure_nw_interface.sh index d142d8c8e8..c9a1bc95e8 100644 --- a/cookbooks/aws-parallelcluster-environment/files/redhat-8.network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/redhat-8.network_interfaces/configure_nw_interface.sh @@ -13,7 +13,8 @@ set -e if [ -z "${DEVICE_NAME}" ] || # name of the device - [ -z "${DEVICE_NUMBER}" ] || # number of the device + [ -z "${DEVICE_NUMBER}" ] || # index of the device + [ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card [ -z "${GW_IP_ADDRESS}" ] || # gateway ip address [ -z "${DEVICE_IP_ADDRESS}" ] || # ip address to assign to the interface [ -z "${CIDR_PREFIX_LENGTH}" ] # the prefix length of the device IP cidr block @@ -23,9 +24,10 @@ then fi con_name="System ${DEVICE_NAME}" -route_table="100${DEVICE_NUMBER}" -priority="100${DEVICE_NUMBER}" -metric="100${DEVICE_NUMBER}" +SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%03d" $DEVICE_NUMBER) +route_table="1${SUFFIX}" +priority="1${SUFFIX}" +metric="1${SUFFIX}" # Rename connection original_con_name=`nmcli -t -f GENERAL.CONNECTION device show ${DEVICE_NAME} | cut -f2 -d':'` diff --git a/cookbooks/aws-parallelcluster-environment/files/rocky/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/rocky/network_interfaces/configure_nw_interface.sh index d142d8c8e8..c9a1bc95e8 100644 --- a/cookbooks/aws-parallelcluster-environment/files/rocky/network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/rocky/network_interfaces/configure_nw_interface.sh @@ -13,7 +13,8 @@ set -e if [ -z "${DEVICE_NAME}" ] || # name of the device - [ -z "${DEVICE_NUMBER}" ] || # number of the device + [ -z "${DEVICE_NUMBER}" ] || # index of the device + [ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card [ -z "${GW_IP_ADDRESS}" ] || # gateway ip address [ -z "${DEVICE_IP_ADDRESS}" ] || # ip address to assign to the interface [ -z "${CIDR_PREFIX_LENGTH}" ] # the prefix length of the device IP cidr block @@ -23,9 +24,10 @@ then fi con_name="System ${DEVICE_NAME}" -route_table="100${DEVICE_NUMBER}" -priority="100${DEVICE_NUMBER}" -metric="100${DEVICE_NUMBER}" +SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%03d" $DEVICE_NUMBER) +route_table="1${SUFFIX}" +priority="1${SUFFIX}" +metric="1${SUFFIX}" # Rename connection original_con_name=`nmcli -t -f GENERAL.CONNECTION device show ${DEVICE_NAME} | cut -f2 -d':'` diff --git a/cookbooks/aws-parallelcluster-environment/files/ubuntu/network_interfaces/configure_nw_interface.sh b/cookbooks/aws-parallelcluster-environment/files/ubuntu/network_interfaces/configure_nw_interface.sh index 2e853057d9..f2fc909c9a 100644 --- a/cookbooks/aws-parallelcluster-environment/files/ubuntu/network_interfaces/configure_nw_interface.sh +++ b/cookbooks/aws-parallelcluster-environment/files/ubuntu/network_interfaces/configure_nw_interface.sh @@ -10,7 +10,8 @@ set -e if [ -z "${DEVICE_NAME}" ] || # name of the device - [ -z "${DEVICE_NUMBER}" ] || # number of the device + [ -z "${DEVICE_NUMBER}" ] || # index of the device + [ -z "${NETWORK_CARD_INDEX}" ] || # index of the network card [ -z "${GW_IP_ADDRESS}" ] || # gateway ip address [ -z "${DEVICE_IP_ADDRESS}" ] || # ip address to assign to the interface [ -z "${CIDR_PREFIX_LENGTH}" ] || # the prefix length of the device IP cidr block @@ -40,7 +41,8 @@ if [ "${STATIC_IP_CONFIG}" = "" ] fi FILE="/etc/netplan/${DEVICE_NAME}.yaml" -ROUTE_TABLE="100${DEVICE_NUMBER}" +SUFFIX=$(printf "%03d" $NETWORK_CARD_INDEX)$(printf "%03d" $DEVICE_NUMBER) +ROUTE_TABLE="1${SUFFIX}" echo "Configuring ${DEVICE_NAME} with IP:${DEVICE_IP_ADDRESS} CIDR_PREFIX:${CIDR_PREFIX_LENGTH} NETMASK:${NETMASK} GW:${GW_IP_ADDRESS} ROUTING_TABLE:${ROUTE_TABLE}" diff --git a/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb b/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb index ef5898cb9d..bee82724ee 100644 --- a/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb +++ b/cookbooks/aws-parallelcluster-environment/recipes/config/network_interfaces.rb @@ -16,7 +16,13 @@ return if on_docker? def network_card_index(mac, token) + # This IMDS call is not available on single NIC instance, therefore fallback to 0 uri = URI("http://169.254.169.254/latest/meta-data/network/interfaces/macs/#{mac}/network-card") + get_metadata_with_token(token, uri) || 0 +end + +def device_number(mac, token) + uri = URI("http://169.254.169.254/latest/meta-data/network/interfaces/macs/#{mac}/device-number") get_metadata_with_token(token, uri) end @@ -69,6 +75,7 @@ def cidr_to_netmask(cidr) # Configure nw interfaces macs.each do |mac| device_name = device_name(mac) + device_number = device_number(mac, token) network_card_index = network_card_index(mac, token) gw_ip_address = gateway_address device_ip_address = device_ip(mac, token) @@ -84,7 +91,8 @@ def cidr_to_netmask(cidr) environment( # TODO: The variables are a superset of what's required by individual scripts. Consider simplification. 'DEVICE_NAME' => device_name, - 'DEVICE_NUMBER' => "#{network_card_index}", # in configure_nw_interface DEVICE_NUMBER actually means network card index + 'DEVICE_NUMBER' => "#{device_number}", + 'NETWORK_CARD_INDEX' => "#{network_card_index}", 'GW_IP_ADDRESS' => gw_ip_address, 'DEVICE_IP_ADDRESS' => device_ip_address, 'CIDR_PREFIX_LENGTH' => cidr_prefix_length,