In [71]:
import json
import yaml
import boto3
import base64
import logging
from botocore.exceptions import NoCredentialsError, ClientError

instance_id_list = []


In [17]:
def load_yaml_file(file_path):
    """
    Load and parse a YAML file.

    Args:
        file_path (str): The path to the YAML file to be read.

    Returns:
        dict: Parsed content of the YAML file as a dictionary.
    """
    with open(file_path, "r") as file:
        try:
            data = yaml.safe_load(file)
            return data
        except yaml.YAMLError as error:
            print(f"Error reading the YAML file: {error}")
            return None


In [90]:
yaml_file_path = "config.yml"
config_data = load_yaml_file(yaml_file_path)


In [91]:
print(config_data)


{'aws': {'region': 'us-east-1', 'iam_instance_profile_arn': 'arn:aws:iam::471112568442:instance-profile/EC2'}, 'run_steps': {'security_group_creation': True, 'key_pair_generation': False, 'deploy_ec2_instance': True, 'delete_ec2_instance': True}, 'security_group': {'group_name': 'ec2_multi_deploy', 'description': 'MultiDeploy EC2 Security Group', 'vpc_id': None}, 'key_pair_gen': {'key_pair_name': 'ec2_multi_deploy_kp', 'key_pair_fpath': 'ec2_multi_deploy_kp.pem'}, 'instances': [{'instance_type': 'g5.2xlarge', 'ami_id': 'ami-05c3e698bd0cffe7e', 'startup_script': 'startup_scripts/gpu_ubuntu_startup.txt', 'command_to_run': 'sudo -u ubuntu bash -c fmbench --config-file src/fmbench/configs/llama3/8b/config-ec2-llama3-8b.yml --local-mode yes --write-bucket placeholder --tmp-dir /tmp > fmbench.log 2>&1'}]}


In [20]:
def get_security_group_id_by_name(group_name, vpc_id, region="us-east-1"):
    """
    Retrieve the security group ID based on its name and VPC ID.

    Args:
        sg_name (str): The name of the security group.
        vpc_id (str): The ID of the VPC where the security group is located.
        region (str): The AWS region.

    Returns:
        str: The security group ID if found, None otherwise.
    """
    ec2_client = boto3.client("ec2", region_name=region)

    try:
        response = ec2_client.describe_security_groups(
            Filters=[
                {"Name": "group-name", "Values": [group_name]},
            ]
        )
        # If security group exists, return the ID
        if response["SecurityGroups"]:
            return response["SecurityGroups"][0]["GroupId"]
        else:
            print(f"Security group '{group_name}' not found in VPC '{vpc_id}'.")
            return None

    except ClientError as e:
        print(f"Error retrieving security group: {e}")
        return None


In [21]:
def create_security_group(group_name, description, vpc_id=None, region="us-east-1"):
    """
    Create an EC2 security group.

    Args:
        group_name (str): Name of the security group.
        description (str): Description of the security group.
        vpc_id (str, optional): ID of the VPC in which to create the security group. If None, it will be created in the default VPC.
        region (str): AWS region where the security group will be created.

    Returns:
        str: ID of the created security group.
    """
    # Initialize the EC2 client
    ec2_client = boto3.client("ec2", region_name=region)

    try:
        # Define parameters for creating the security group
        params = {
            "GroupName": group_name,
            "Description": description,
        }

        # Only add the VpcId parameter if vpc_id is not None
        if vpc_id is not None:
            params["VpcId"] = vpc_id

        # Create the security group
        response = ec2_client.create_security_group(**params)

        security_group_id = response["GroupId"]
        print(f"Security Group Created: {security_group_id}")

        return security_group_id

    except ClientError as e:
        # Check if the error is due to the group already existing
        if e.response["Error"]["Code"] == "InvalidGroup.Duplicate":
            print(
                f"Security Group '{group_name}' already exists. Fetching existing security group ID."
            )
            return get_security_group_id_by_name(group_name, vpc_id, region)
        else:
            print(f"Error creating security group: {e}")
            return None


In [22]:
def authorize_inbound_rules(security_group_id, region="us-east-1"):
    """
    Authorize inbound rules to a security group.

    Args:
        security_group_id (str): ID of the security group.
        region (str): AWS region where the security group is located.
    """
    # Initialize the EC2 client
    ec2_client = boto3.client("ec2", region_name=region)

    try:
        # Authorize inbound rules
        ec2_client.authorize_security_group_ingress(
            GroupId=security_group_id,
            IpPermissions=[
                {
                    "IpProtocol": "tcp",
                    "FromPort": 22,
                    "ToPort": 22,
                    "IpRanges": [{"CidrIp": "0.0.0.0/0"}],  # Allow SSH from anywhere
                },
                {
                    "IpProtocol": "tcp",
                    "FromPort": 80,
                    "ToPort": 80,
                    "IpRanges": [{"CidrIp": "0.0.0.0/0"}],  # Allow HTTP from anywhere
                },
            ],
        )
        print(f"Inbound rules added to Security Group {security_group_id}")

    except ClientError as e:
        if e.response["Error"]["Code"] == "InvalidPermission.Duplicate":
            print(
                f"Inbound rule already exists for Security Group {security_group_id}. Skipping..."
            )
        else:
            print(f"Error authorizing inbound rules: {e}")


In [23]:
def create_key_pair(key_name, region="us-east-1"):
    """
    Create a new key pair for EC2 instances.

    Args:
        key_name (str): The name of the key pair.
        region (str): AWS region where the key pair will be created.

    Returns:
        str: The private key material in PEM format.
    """
    # Initialize the EC2 client
    ec2_client = boto3.client("ec2", region_name=region)

    try:
        # Create a key pair
        response = ec2_client.create_key_pair(KeyName=key_name)

        # Extract the private key from the response
        private_key = response["KeyMaterial"]

        # Save the private key to a .pem file
        with open(f"{key_name}.pem", "w") as key_file:
            key_file.write(private_key)

        # Set the correct permissions for the .pem file
        import os

        os.chmod(f"{key_name}.pem", 0o400)  # Readable only by the owner

        print(f"Key pair '{key_name}' created and saved as '{key_name}.pem'")
        return private_key

    except ClientError as e:
        print(f"Error creating key pair: {e}")
        return None


In [94]:
def create_ec2_instance(
    key_name,
    security_group_id,
    user_data_script,
    ami,
    instance_type,
    iam_arn,
    region="us-east-1",
):
    """
    Create an EC2 instance with a startup script (user data) in the specified region.

    Args:
        key_name (str): The name of the key pair to associate with the instance.
        security_group_id (str): The ID of the security group to associate with the instance.
        user_data_script (str): The script to run on startup.
        region (str): The AWS region to launch the instance in.

    Returns:
        str: The ID of the created instance.
    """
    # Initialize a session using Amazon EC2
    ec2_resource = boto3.resource("ec2", region_name="us-east-1")

    try:
        # Create a new EC2 instance with user data
        instances = ec2_resource.create_instances(
            BlockDeviceMappings=[
                {
                    "DeviceName": "/dev/sda1",
                    "Ebs": {
                        "DeleteOnTermination": True,
                        "Iops": 16000,
                        "VolumeSize": 250,
                        "VolumeType": "gp3",
                    },
                },
            ],
            ImageId=ami,
            InstanceType=instance_type,  # Instance type
            KeyName=key_name,  # Name of the key pair
            SecurityGroupIds=[security_group_id],  # Security group ID
            UserData=user_data_script,  # The user data script to run on startup
            MinCount=1,  # Minimum number of instances to launch
            MaxCount=1,  # Maximum number of instances to launch
            IamInstanceProfile={  # IAM role to associate with the instance
                "Arn": iam_arn
            },
            TagSpecifications=[
                {
                    "ResourceType": "instance",
                    "Tags": [{"Key": "Name", "Value": "FMbench-EC2"}],
                }
            ],
        )

        # Get the instance ID of the created instance
        instance_id = instances[0].id
        print(f"EC2 Instance '{instance_id}' created successfully with user data.")

        return instance_id

    except ClientError as e:
        print(f"Error creating EC2 instance: {e}")
        return None


In [82]:
def delete_ec2_instance(instance_id, region="us-east-1"):
    """
    Deletes an EC2 instance given its instance ID.

    Args:
        instance_id (str): The ID of the instance to delete.
        region (str): The AWS region where the instance is located.

    Returns:
        bool: True if the instance was deleted successfully, False otherwise.
    """
    ec2_client = boto3.client("ec2", region_name=region)

    try:
        # Terminate the EC2 instance
        response = ec2_client.terminate_instances(InstanceIds=[instance_id])
        print(f"Instance {instance_id} has been terminated.")
        return True
    except ClientError as e:
        print(f"Error deleting instance {instance_id}: {e}")
        return False


In [26]:
if config_data["run_steps"]["security_group_creation"]:
    GROUP_NAME = config_data["security_group"].get("group_name")
    DESCRIPTION = config_data["security_group"].get("description", " ")
    VPC_ID = config_data["security_group"].get("vpc_id", "")
    try:
        sg_id = create_security_group(GROUP_NAME, DESCRIPTION, VPC_ID)

        if sg_id:
            # Add inbound rules if security group was created successfully
            authorize_inbound_rules(sg_id)
    except ClientError as e:
        print(f"An error occurred while creating or getting the security group: {e}")


Security Group 'ec2_multi_deploy' already exists. Fetching existing security group ID.
Inbound rule already exists for Security Group sg-013c79f5ffe82fb52. Skipping...


In [27]:
if config_data["run_steps"]["key_pair_generation"]:
    KEY_PAIR_NAME = config_data["key_pair_gen"]["key_pair_name"]
    private_key = create_key_pair(KEY_PAIR_NAME)
elif config_data["run_steps"]["key_pair_generation"] == False:
    KEY_PAIR_NAME = config_data["key_pair_gen"]["key_pair_name"]
    PRIVATE_KEY_FNAME = config_data["key_pair_gen"]["key_pair_fpath"]
    try:
        with open(f"{PRIVATE_KEY_FNAME}", "r") as file:
            private_key = file.read()
    except FileNotFoundError:
        print(f"File not found: {PRIVATE_KEY_FNAME}")
    except IOError as e:
        print(f"Error reading file {PRIVATE_KEY_FNAME}: {e}")


In [28]:
for i in config_data["instances"]:
    print(i)


{'instance_type': 'g5.2xlarge', 'ami_id': 'ami-07f302d2a74e2b584', 'startup_script': 'startup_scripts/az_linux2_startup.txt'}


In [101]:
if config_data["run_steps"]["deploy_ec2_instance"]:
    iam_arn = config_data["aws"]["iam_instance_profile_arn"]
    print(iam_arn)
    # WIP Parallelize This.
    for instance in config_data["instances"]:
        instance_type = instance["instance_type"]
        ami_id = instance["ami_id"]
        startup_script = instance["startup_script"]
        command_to_run = instance["command_to_run"]
        with open(f"{startup_script}", "r") as file:
            user_data_script = file.read()
        user_data_script += command_to_run
        # Create an EC2 instance with the user data script
        instance_id = create_ec2_instance(
            KEY_PAIR_NAME,
            sg_id,
            user_data_script,
            ami_id,
            instance_type,
            iam_arn,
            
        )
        instance_id_list.append(instance_id)


arn:aws:iam::471112568442:instance-profile/EC2
EC2 Instance 'i-0573e990f255b8b52' created successfully with user data.


In [84]:
instance_id_list


[]

In [100]:
if config_data["run_steps"]["delete_ec2_instance"]:
    for instance_id in instance_id_list:
        delete_ec2_instance(instance_id)
    instance_id_list = []


Instance i-063d6d24ba763b9a2 has been terminated.
