# SageMaker model deployment as CI/CD pipeline
This notebook demonstrates how to use SageMaker Project template for CI/CD model deployment.

## Load packages and get environment configuration 

In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import sagemaker
import json
import boto3
from sagemaker import get_execution_role
import sagemaker.session
from sklearn.model_selection import train_test_split
from sklearn import datasets

sm = boto3.client("sagemaker")
ssm = boto3.client("ssm")

def get_environment(project_name, ssm_params):
    r = sm.describe_domain(
            DomainId=sm.describe_project(
                ProjectName=project_name
                )["CreatedBy"]["DomainId"]
        )
    del r["ResponseMetadata"]
    del r["CreationTime"]
    del r["LastModifiedTime"]
    r = {**r, **r["DefaultUserSettings"]}
    del r["DefaultUserSettings"]

    i = {
        **r,
        **{t["Key"]:t["Value"] 
            for t in sm.list_tags(ResourceArn=r["DomainArn"])["Tags"] 
            if t["Key"] in ["EnvironmentName", "EnvironmentType"]}
    }

    for p in ssm_params:
        try:
            i[p["VariableName"]] = ssm.get_parameter(Name=f"{i['EnvironmentName']}-{i['EnvironmentType']}-{p['ParameterName']}")["Parameter"]["Value"]
        except:
            i[p["VariableName"]] = ""

    return i

def get_session(region, default_bucket):
    """Gets the sagemaker session based on the region.

    Args:
        region: the aws region to start the session
        default_bucket: the bucket to use for storing the artifacts

    Returns:
        sagemaker.session.Session instance
    """

    boto_session = boto3.Session(region_name=region)

    sagemaker_client = boto_session.client("sagemaker")
    runtime_client = boto_session.client("sagemaker-runtime")
    return sagemaker.session.Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_runtime_client=runtime_client,
        default_bucket=default_bucket,
    )

In [None]:
# Set to the specific SageMaker project name
project_name = 

# Dynamically load environmental SSM parameters - provide the list of the variables to load from SSM parameter store
ssm_parameters = [
    {"VariableName":"DataBucketName", "ParameterName":"data-bucket-name"},
    {"VariableName":"ModelBucketName", "ParameterName":"model-bucket-name"},
    {"VariableName":"S3VPCEId", "ParameterName":"s3-vpce-id"},
    {"VariableName":"S3KmsKeyId", "ParameterName":"kms-s3-key-arn"},
    {"VariableName":"PipelineExecutionRole", "ParameterName":"sm-pipeline-execution-role-arn"},
    {"VariableName":"ModelExecutionRole", "ParameterName":"sm-model-execution-role-name"},
    {"VariableName":"OUStagingId", "ParameterName":"ou-staging-id"},
    {"VariableName":"OUProdId", "ParameterName":"ou-prod-id"},
]

env_data = get_environment(project_name=project_name)
print(f"Environment data:\n{json.dumps(env_data, indent=2)}")

In [None]:
# Create SageMaker session
sagemaker_session = get_session(boto3.Session().region_name, env_data["DataBucketName"])

region = boto3.Session().region_name
pipeline_role = env_data["PipelineExecutionRole"]
processing_role = env_data["ExecutionRole"]
model_execution_role = env_data["ModelExecutionRole"]
training_role = env_data["ExecutionRole"]
data_bucket = sagemaker_session.default_bucket()
model_bucket = env_data["ModelBucketName"]

print(f"SageMaker version: {sagemaker.__version__}")
print(f"Region: {region}")
print(f"Pipeline execution role: {pipeline_role}")
print(f"Processing role: {processing_role}")
print(f"Training role: {training_role}")
print(f"Model execution role: {model_execution_role}")
print(f"Pipeline data bucket: {data_bucket}")


project_id = sm_client.describe_project(ProjectName=project_name)['ProjectId']
# Replace the model_package_group_name with <project_name>, <project_id> with Model Build Train MLOps pipeline
model_package_group_name = f"{project_name}-{project_id}"
print(f"Model package group name: {model_package_group_name}")

assert(len(project_name) <= 15 ) # the project name should not have more than 15 chars

# Prefix for S3 objects
prefix=f"{project_name}-{project_id}"

## Setup the network config

In [1]:
network_config = NetworkConfig(
        enable_network_isolation=False, 
        security_group_ids=env_data["SecurityGroups"],
        subnets=env_data["SubnetIds"],
        encrypt_inter_container_traffic=True)

NameError: name 'NetworkConfig' is not defined

## Load the dataset
Load the [iris dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html) from `sklearn` module. The iris dataset is a classic and very easy multi-class classification dataset.

In [None]:
iris = datasets.load_iris()
dataset = np.insert(iris.data, 0, iris.target,axis=1)

df = pd.DataFrame(data=dataset, columns=['iris_id'] + iris.feature_names)
df['species'] = df['iris_id'].map(lambda x: 'setosa' if x == 0 else 'versicolor' if x == 1 else 'virginica')

df.head()

### Upload the dataset to an S3 bucket

In [None]:
X=iris.data
y=iris.target

# Split the dataset into train and test parts
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)
yX_train = np.column_stack((y_train, X_train))
yX_test = np.column_stack((y_test, X_test))
np.savetxt("iris_train.csv", yX_train, delimiter=",", fmt='%0.3f')
np.savetxt("iris_test.csv", yX_test, delimiter=",", fmt='%0.3f')

# Upload the dataset to an S3 bucket
input_train = sagemaker_session.upload_data(path='iris_train.csv', key_prefix=f'{prefix}/datasets/iris/data')
input_test = sagemaker_session.upload_data(path='iris_test.csv', key_prefix=f'{prefix}/datasets/iris/data')

print(input_train)
print(input_test)

### Creating the ML Pipeline

#### Pipeline input parameters

In [None]:
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString,
)

training_instance_type = ParameterString(
    name="TrainingInstanceType",
    default_value="ml.m5.xlarge"
)
training_instance_count = ParameterInteger(
    name="TrainingInstanceCount",
    default_value=1
)
input_train_data = ParameterString(
    name="InputDataTrain",
    default_value=input_train,
)
input_test_data = ParameterString(
    name="InputDataTest",
    default_value=input_test,
)

#### Estimator that will run the training process

In [None]:
from sagemaker.estimator import Estimator
import time

model_path = f"s3://{model_bucket}/{project_name}-{project_id}/iris-{time.strftime('%Y-%m-%d-%H-%M-%S')}"

image_uri = sagemaker.image_uris.retrieve(
    framework="xgboost", region=region, version="1.0-1", py_version="py3", 
    instance_type=training_instance_type,
)
xgb_train = Estimator(
    image_uri=image_uri,
    instance_type=training_instance_type,
    instance_count=training_instance_count,
    output_path=model_path,
    sagemaker_session=sagemaker_session,
    role=role,
)
xgb_train.set_hyperparameters(
    eta=0.1,
    max_depth=10,
    gamma=4,
    num_class=len(np.unique(y)),
    alpha=10,
    min_child_weight=6,
    silent=0,
    objective='multi:softmax',
    num_round=30
)

### Training step

In [None]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

step_train = TrainingStep(
    name="IrisTrain",
    estimator=xgb_train,
    inputs={
        "train": TrainingInput(s3_data=input_train_data, content_type="text/csv"),
        "validation": TrainingInput(s3_data=input_test_data, content_type="text/csv"
        )
    },
)

### Model register step

In [None]:
from sagemaker.workflow.step_collections import RegisterModel

# NOTE: model_approval_status is not available as arg in service dsl currently
step_register = RegisterModel(
    name="IrisRegisterModel",
    estimator=xgb_train,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    content_types=["text/csv"],
    response_types=["text/csv"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=model_package_group_name
)

### Create a pipeline

In [None]:
from botocore.exceptions import ClientError, ValidationError
from sagemaker.workflow.pipeline import Pipeline

# NOTE:
# condition steps have issues in service so we go straight to step_register
pipeline_name = "IrisTrainRegister-%s" % ts
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        training_instance_type,
        training_instance_count,        
        input_train_data,
        input_test_data
    ],
    steps=[step_train, step_register],
    sagemaker_session=sagemaker_session,
)

try:
    response = pipeline.create(role_arn=role)
except ClientError as e:
    error = e.response["Error"]
    if error["Code"] == "ValidationError" and "Pipeline names must be unique within" in error["Message"]:
        print(error["Message"])
        response = pipeline.describe()
    else:
        raise

pipeline_arn = response["PipelineArn"]
sm_client.add_tags(
    ResourceArn=pipeline_arn,
    Tags=[
        {'Key': 'sagemaker:project-name', 'Value': project_name },
        {'Key': 'sagemaker:project-id', 'Value': project_id }
    ]
)
print(pipeline_arn)

### Execute the pipeline

In [None]:
start_response = pipeline.start(parameters={
    "TrainingInstanceCount": "1"
})

pipeline_execution_arn = start_response.arn
print(pipeline_execution_arn)

while True:
    resp = sm_client.describe_pipeline_execution(PipelineExecutionArn=pipeline_execution_arn)
    if resp['PipelineExecutionStatus'] == 'Executing':
        print('Running...')
    else:
        print(resp['PipelineExecutionStatus'], pipeline_execution_arn)
        break
    time.sleep(15)

### Finally, approve the model to kick-off the deployment process

In [None]:
# list all packages and select the latest one
packages = sm_client.list_model_packages(ModelPackageGroupName=model_package_group_name)['ModelPackageSummaryList']
packages = sorted(packages, key=lambda x: x['CreationTime'], reverse=True)

latest_model_package_arn = packages[0]['ModelPackageArn']

In [None]:
model_package_update_response = sm_client.update_model_package(
    ModelPackageArn=latest_model_package_arn,
    ModelApprovalStatus="Approved",
)