# Create SageMaker Resources

In [None]:
%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [None]:
import base64
import os
from typing import Dict, List

import boto3
from dotenv import find_dotenv, load_dotenv

In [None]:
%aimport src.s3.buckets
import src.s3.buckets as s3b

%aimport src.iam.iam_roles
import src.iam.iam_roles as iamr

%aimport src.ec2.ec2_instances_sec_groups
import src.ec2.ec2_instances_sec_groups as ec2h

In [None]:
load_dotenv(find_dotenv())

In [None]:
aws_region = os.getenv("AWS_REGION")

## About

In this notebook, the following resources related to AWS SageMaker will be created
- S3 bucket
- EC2 Security group for the SageMaker instance
- CloudWatch Logging group for the SageMaker instance
- SageMaker notebook lifecycle configuration
- SageMaker notebook instance

### Pre-Requisites
1. As mentioned in `README.md`, the following environment variables should be set with the user's AWS credendials ([1](https://docs.aws.amazon.com/sdk-for-php/v3/developer-guide/guide_credentials_environment.html), [2](https://docs.aws.amazon.com/sdk-for-php/v3/developer-guide/guide_credentials_profiles.html))
   - `AWS_ACCESS_KEY_ID`
   - `AWS_SECRET_KEY`
   - `AWS_REGION`

   These credentials must be associated to a user group whose users have been granted programmatic access to AWS resources. In order to configure this for an IAM user group, see the documentation [here](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html#id_users_create_console).

### Notes
1. All resources must exist in the same AWS region (specified by the environment variable `AWS_REGION`).

## User Inputs

In [None]:
# S3
s3_bucket_name = "sagemakerdemoedesz4321"

# IAM Role
iam_role_name = "AmazonSageMaker-ExecutionRole-20211222T235985"
# iam_role_path = "/"
# iam_role_description = "Sagemaker S3 permissions role"
# iam_role_trust_policy = ...

# EC2 Security Groups
sg_group_tags = [{"Key": "Name", "Value": sg_group_name}]
sg_group_name = "mysgname"
sg_group_desc = "My security group"

# Sagemaker Lifecycle
nb_lifecycle_name = "mynbconfig"
nb_instance_name = "mydemo"
nb_instance_type = "ml.t3.xlarge"
nb_instance_tags = [{"Key": "Name", "Value": nb_instance_name}]

# Cloud Watch
cw_log_group_name = "/aws/sagemaker/NotebookInstances"

Details about AWS EC2 instance types are available [here](https://aws.amazon.com/ec2/instance-types/).

## Create the S3 Bucket

In [None]:
%%time
s3_bucket_creation_response = s3b.create_s3_bucket(s3_bucket_name, aws_region)
s3_bucket_creation_response

## IAM Role for Sagemaker to Access S3

Currently, an IAM role granting Sagemaker appropriate S3 access must be created from the AWS console.

To do this, [start](https://docs.aws.amazon.com/sagemaker/latest/dg/howitworks-create-ws.html) the process of creating of a Sagemaker instance through the console and an IAM role should be created manually following instructions [here](https://docs.aws.amazon.com/glue/latest/dg/create-an-iam-role-sagemaker-notebook.html). After the IAM role is created, the Sagemaker instance creation process can be canceled as the instance will be programmatically created later in this notebook.

In [None]:
iam_client = boto3.client("iam", region_name=aws_region)
role_response = iam_client.get_role(RoleName=iam_role_name)
role_response

Future work should focus on programmatically (using `boto3`, [1](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/iam.html#IAM.Client.create_role)) creating such a role, using the IAM policy found in the **Add Additional Amazon S3 Permissions to a SageMaker Execution Role** section from [here](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html#sagemaker-roles-create-execution-role).

## Create EC2 Security Group

Get the subnet ID

In [None]:
def get_subnet_ids(aws_region: str, filters: Dict) -> List[str]:
    client = boto3.client("ec2", region_name=aws_region)
    subnet_list = client.describe_subnets(**filters)
    subnet_ids = [
        {
            "id": sn["SubnetId"],
            "availability_zone": sn['AvailabilityZone'],
            "state": sn["State"],
        }
        for sn in subnet_list["Subnets"]
    ]
    return subnet_ids

In [None]:
%%time
vpcs_list = ec2h.list_vpcs(aws_region)
vpc_id = vpcs_list[0]["VpcId"]
subbet_filters = dict(Filters=[{"Name":"vpc-id", "Values":[vpc_id]}])
subnet_ids = get_subnet_ids(aws_region: str, filters)

Create the security group

In [None]:
%%time
ec2_resource = boto3.resource("ec2", region_name=aws_region)
security_group_creation_response = ec2_resource.create_security_group(
    Description=sg_group_desc,
    GroupName=sg_group_name,
    VpcId=vpc_id,
    TagSpecifications=[
        {"ResourceType": "security-group", "Tags": sg_group_tags}
    ],
)
security_group_creation_response

## SageMaker

### Create Lifecycle Config

In [None]:
def get_tag(env_vars: List[str], env_var_names: List[str]) -> List[str]:
    overall = "\n"
    for env_var in env_vars:
        export_cmd_str = (
            f'echo "export ${env_var}=$TAG" >> /etc/profile.d/jupyter-env.sh'
        )
        a = (
            f"TAG=$(aws sagemaker list-tags --resource-arn $NOTEBOOK_ARN  | "
            f"""jq -r --arg {env_var} "${env_var}" .'Tags[] | select(.Key == ${env_var}).Value'"""
            f" --raw-output)\n{export_cmd_str}\n\n"
        )
        overall += a
    # print(overall)

    abc = "# PARAMETERS\n"
    for envv, envvn in zip(env_vars, env_var_names):
        abc += f"{envv}={envvn}\n"
    # print(abc)
    return [abc.strip(), overall.strip()]


def render_lifecycle_config_create_script(
    pypackages_list: List[str],
    env_vars: List[str],
) -> str:
    pypackages_str = " ".join(pypackages_list)
    vars_defns, tags_set_str = get_tag(
        env_vars, [os.getenv(ev_name) for ev_name in env_var_names]
    )
    arn_str = (
        "NOTEBOOK_ARN=$(jq '.ResourceArn' /opt/ml/metadata/resource-metadata.json --raw-output)\n"
        f"touch /etc/profile.d/jupyter-env.sh\n\n{tags_set_str}\n\ninitctl restart "
        "jupyter-server --no-wait"
    )
    script = f"""#!/bin/bash\n\nset -e\n\n{vars_defns}\n\n{arn_str}
    """
    b64_encoded = base64.b64encode(script.encode())
    return b64_encoded.decode("ascii")
    # return script


def create_sagemaker_nb_lifecycle_config(
    nb_lifecycle_name: str, aws_region: str, on_create_script: str
) -> None:
    client = boto3.client("sagemaker", region_name=aws_region)
    life_cycle_creation_response = client.create_notebook_instance_lifecycle_config(
        NotebookInstanceLifecycleConfigName=nb_lifecycle_name,
        OnCreate=[{"Content": on_create_script}],
    )
    return life_cycle_creation_response

In [None]:
%%time
lc_script = render_lifecycle_config_create_script(
    pypackages_list=["scipy"],
    env_vars=["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION"],
)
# print(type(lc_script))
# print(lc_script)
life_cycle_creation_response = create_sagemaker_nb_lifecycle_config(
    nb_lifecycle_name, aws_region, lc_script
)
life_cycle_creation_response

### Create Notebook Instance

Get the security group ID

In [None]:
%%time
sg_filter = dict(Filters=[{"Name": "tag:Name", "Values": [sg_group_name]}])
security_groups = ec2_resource.security_groups.filter(**sg_filter)
sg_group_list = [security_group.id for security_group in security_groups]

Create the sagemaker notebook instance

In [None]:
def create_sagemaker_nb_instance(
    nb_instance_name: str,
    nb_instance_type: str,
    sg_group_id: str,
    subnet_id: str,
    nb_lifecycle_name: str,
    iam_role_arn: str,
    aws_region: str,
    nb_instance_tags: List[str],
):
    client = boto3.client("sagemaker", region_name=aws_region)
    sgm_creation_response = client.create_notebook_instance(
        NotebookInstanceName=nb_instance_name,
        InstanceType=nb_instance_type,
        SubnetId=subnet_id,
        SecurityGroupIds=[sg_group_id],    
        RoleArn=iam_role_arn,
        Tags=nb_instance_tags,
        LifecycleConfigName=nb_lifecycle_name,
        DirectInternetAccess='Enabled',
        VolumeSizeInGB=20,
        PlatformIdentifier="notebook-al2-v1",
        RootAccess='Enabled',
    )
    return sg_nb_creation_response

In [None]:
%%time
sg_nb_creation_response = create_sagemaker_nb_instance(
    nb_instance_name,
    nb_instance_type,
    sg_group_list[0],
    subnet_ids[0]['id'],
    role_response["Role"]["Arn"],
    aws_region,
    nb_instance_tags,
)