Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/dokken-system-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ jobs:
- ubuntu2204
- rhel8
- rocky8
- rhel9
- rocky9
fail-fast: false
steps:
- uses: actions/checkout@main
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ This file is used to list changes made in each version of the AWS ParallelCluste
------

**ENHANCEMENTS**
- Add support for RHEL9.
- Add support for Rocky Linux 9 as `CustomAmi` created through `build-image` process. No public official ParallelCluster Rocky9 Linux AMI is made available at this time.
- Add the configuration parameter `DeploymentSettings/DefaultUserHome` to allow users to move the default user's home directory to `/local/home` instead of `/home` (default).
- Add possibility to choose between Open and Closed Source Nvidia Drivers when building an AMI, through the ```['cluster']['nvidia']['kernel_open']``` cookbook node attribute.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,31 +1,11 @@
# FIXME: Fix Code Duplication
# pylint: disable=R0801

import configparser
import json
import os
import re
import sys
import syslog
import time

import boto3
import requests
from botocore.config import Config

METADATA_REQUEST_TIMEOUT = 60


def get_imdsv2_token():
# Try with getting IMDSv2 token, fall back to IMDSv1 if can not get the token
token = requests.put(
"http://169.254.169.254/latest/api/token",
headers={"X-aws-ec2-metadata-token-ttl-seconds": "300"},
timeout=METADATA_REQUEST_TIMEOUT,
)
headers = {}
if token.status_code == requests.codes.ok:
headers["X-aws-ec2-metadata-token"] = token.content
return headers


def validate_device_name(device_name):
Expand Down Expand Up @@ -66,47 +46,6 @@ def adapt_device_name(dev):
return dev


def parse_proxy_config():
config = configparser.RawConfigParser()
config.read("/etc/boto.cfg")
proxy_config = Config()
if config.has_option("Boto", "proxy") and config.has_option("Boto", "proxy_port"):
proxy = config.get("Boto", "proxy")
proxy_port = config.get("Boto", "proxy_port")
proxy_config = Config(proxies={"https": f"{proxy}:{proxy_port}"})
return proxy_config


def get_device_volume_id(ec2, dev, instance_id):
# Poll for blockdevicemapping
devices = ec2.describe_instance_attribute(InstanceId=instance_id, Attribute="blockDeviceMapping").get(
"BlockDeviceMappings"
)
dev_map = dict((d.get("DeviceName"), d) for d in devices)
loop_count = 0
while dev not in dev_map:
if loop_count == 36:
syslog.syslog(f"Dev {dev} did not appears in 180 seconds.")
sys.exit(1)
syslog.syslog(f"Looking for dev {dev} in dev_map {dev_map}")
time.sleep(5)
devices = ec2.describe_instance_attribute(InstanceId=instance_id, Attribute="blockDeviceMapping").get(
"BlockDeviceMappings"
)
dev_map = dict((d.get("DeviceName"), d) for d in devices)
loop_count += 1

return dev_map.get(dev).get("Ebs").get("VolumeId")


def get_metadata_value(token, metadata_path):
return requests.get(
metadata_path,
headers=token,
timeout=METADATA_REQUEST_TIMEOUT,
).text


def main():
syslog.syslog("Starting ec2_dev_2_volid.py script")
try:
Expand All @@ -115,29 +54,14 @@ def main():
syslog.syslog(f"Input block device is {dev}")
except IndexError:
syslog.syslog(syslog.LOG_ERR, "Provide block device i.e. xvdf")

dev = adapt_device_name(dev)

token = get_imdsv2_token()

instance_id = get_metadata_value(token, "http://169.254.169.254/latest/meta-data/instance-id")

region = get_metadata_value(token, "http://169.254.169.254/latest/meta-data/placement/availability-zone")
region = region[:-1]

proxy_config = parse_proxy_config()

# Configure the AWS CA bundle.
# In US isolated regions the dedicated CA bundle will be used.
# In any other region, the default bundle will be used (None stands for the default settings).
# Note: We want to apply a more general solution that applies to every region,
# but for the time being this is enough to support US isolated regions without
# impacting the other ones.
ca_bundle = f"/etc/pki/{region}/certs/ca-bundle.pem" if region.startswith("us-iso") else None

ec2 = boto3.client("ec2", region_name=region, config=proxy_config, verify=ca_bundle)

volume_id = get_device_volume_id(ec2, dev, instance_id)
mapping_file_path = "/dev/disk/by-ebs-volumeid/parallelcluster_dev_id_mapping"
if os.path.isfile(mapping_file_path):
with open(mapping_file_path, "r", encoding="utf-8") as mapping_file:
mapping = json.load(mapping_file)
else:
mapping = {}
volume_id = mapping.get(dev)
print(volume_id)


Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import argparse
import configparser
import json
import os
import re
import subprocess # nosec B404
Expand Down Expand Up @@ -133,6 +134,17 @@ def attach_volume(volume_id, instance_id, ec2):
dev = available_devices[0]
response = ec2.attach_volume(VolumeId=volume_id, InstanceId=instance_id, Device=dev)

mapping_file_path = "/dev/disk/by-ebs-volumeid/parallelcluster_dev_id_mapping"
if os.path.isfile(mapping_file_path):
with open(mapping_file_path, "r", encoding="utf-8") as mapping_file:
mapping = json.load(mapping_file)
else:
mapping = {}
mapping[dev] = volume_id
os.makedirs(os.path.dirname(mapping_file_path), exist_ok=True)
with open(mapping_file_path, "w", encoding="utf-8") as mapping_file:
json.dump(mapping, mapping_file)

# Poll for volume to attach
state = response.get("State")
delay = 5 # seconds
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@
command "mkdir -p $(dirname #{ip6tables_rules_file}) && ip6tables-save > #{ip6tables_rules_file}"
end

template '/etc/init.d/parallelcluster-iptables' do
source 'imds/parallelcluster-iptables.erb'
template '/usr/local/sbin/restore_tables.sh' do
source 'imds/restore_tables.sh.erb'
user 'root'
group 'root'
mode '0744'
Expand All @@ -71,6 +71,25 @@
)
end

template '/usr/local/sbin/save_tables.sh' do
source 'imds/save_tables.sh.erb'
user 'root'
group 'root'
mode '0744'
variables(
iptables_rules_file: iptables_rules_file,
ip6tables_rules_file: ip6tables_rules_file
)
end

template '/etc/systemd/system/parallelcluster-iptables.service' do
source 'imds/parallelcluster-iptables.service.erb'
cookbook 'aws-parallelcluster-environment'
owner 'root'
group 'root'
mode '0644'
end

service "parallelcluster-iptables" do
action %i(enable start)
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and limitations under the License.

provides :cloudwatch, platform: 'redhat' do |node|
node['platform_version'].to_i == 8
node['platform_version'].to_i >= 8
end

use 'partial/_cloudwatch_common'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and limitations under the License.

provides :cloudwatch, platform: 'rocky' do |node|
node['platform_version'].to_i == 8
node['platform_version'].to_i >= 8
end

use 'partial/_cloudwatch_common'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
# See the License for the specific language governing permissions and limitations under the License.
provides :ec2_udev_rules, platform: 'redhat' do |node|
node['platform_version'].to_i == 8
node['platform_version'].to_i >= 8
end

unified_mode true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
# See the License for the specific language governing permissions and limitations under the License.
provides :ec2_udev_rules, platform: 'rocky' do |node|
node['platform_version'].to_i == 8
node['platform_version'].to_i >= 8
end

unified_mode true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@

unified_mode true
use 'partial/_common_udev_configuration'
use 'partial/_debian_udev_configuration'

default_action :setup

action :setup do
action_create_common_udev_files
action_set_udev_autoreload
action_start_ec2blk
end
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,6 @@
mode '0744'
end

cookbook_file 'ec2blkdev-init' do
source 'ec2_udev_rules/ec2blkdev-init'
cookbook 'aws-parallelcluster-environment'
path '/etc/init.d/ec2blkdev'
user 'root'
group 'root'
mode '0744'
end

cookbook_file 'manageVolume.py' do
source 'ec2_udev_rules/manageVolume.py'
cookbook 'aws-parallelcluster-environment'
Expand All @@ -67,8 +58,7 @@
end

action :start_ec2blk do
service "ec2blkdev" do
supports restart: true
action %i(enable start)
execute "Refresh UdevAdmin" do
command "udevadm trigger --action=change --subsystem-match=block"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May you please add a brief comment about why this is required?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not 100% sure that this is required. This was done in the old code. Therefore I kept here to stay on the safe side

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's validate in a follow up activity if it is required or not. Better to not have not required steps

end unless on_docker?
end
Loading