# Building Your Own Algorithm Container

This notebook demonstrates how to package a custom algorithm for Amazon SageMaker using a decision tree classifier for the Iris dataset.

## Install Required Package

In [None]:
!pip install sagemaker-studio-image-build

## Build and Push Container

First ensure you have the proper IAM role trust policy configured:

In [None]:
# Trust Policy for IAM Role

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "codebuild.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

In [None]:
%%sh

# Build and push container
algorithm_name=custom-algorithm-sklearn

cd container

chmod +x decision_tree/train
chmod +x decision_tree/serve

fullname="${algorithm_name}:latest"
rolename="Sagemaker_build_role"
sm-docker build . --repository ${fullname} --role ${rolename} --bucket sagemaker-demo-bpartner

## Set Up Environment

In [None]:
# S3 prefix and imports
prefix = "DEMO-scikit-byo-iris"

import boto3
import re
import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()

In [None]:
# Create SageMaker session
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

In [None]:
# Define data location and image URI
data_location = 's3://sagemaker-demo-bpartner/input'

# Replace with your ECR image URI
image = "992382645889.dkr.ecr.eu-west-1.amazonaws.com/custom-algorithm-sklearn:latest"

## Create and Train Estimator

In [None]:
param_dict = {
 "max_leaf_nodes": 3,
 "random_state": 0,
 "criterion": "gini"
}

estimator = sage.estimator.Estimator(
    image,
    role,
    1,
    "ml.c4.2xlarge",
    output_path="s3://{}/output".format(sess.default_bucket()),
    sagemaker_session=sess,
    hyperparameters=param_dict,
    train_use_spot_instances=True,
    train_max_run=3600,
    train_max_wait=7200 
)

estimator.fit(data_location, logs=True)

## Deploy Model and Create Endpoint

In [None]:
from sagemaker.serializers import CSVSerializer

predictor = estimator.deploy(1, "ml.m4.xlarge", serializer=CSVSerializer())

## Test Predictions

In [None]:
shape = pd.read_csv("testpayload.csv", header=None)

# Create test data
import itertools
a = [10 * i for i in range(2)]
b = [i for i in range(5)]
indices = [i + j for i, j in itertools.product(a, b)]
test_data = shape.iloc[indices[:-1]]

# Get predictions
print(predictor.predict(test_data.values).decode("utf-8"))

## Run Batch Transform

In [None]:
import sagemaker
import boto3
import time
from datetime import datetime

# Define locations
bucket = 'sagemaker-demo-bpartner'
input_prefix = 'batch-input'
output_prefix = 'batch-output'

# Upload test data
s3_client = boto3.client('s3')
s3_client.upload_file(
    'testpayload.csv', 
    bucket, 
    f'{input_prefix}/testpayload.csv'
)

input_location = f's3://{bucket}/{input_prefix}'
output_location = f's3://{bucket}/{output_prefix}'

# Create transformer
transformer = estimator.transformer(
    instance_count=1,
    instance_type='ml.m4.xlarge',
    output_path=output_location,
    strategy='SingleRecord'
)

# Start transform job
transformer.transform(
    input_location,
    content_type='text/csv',
    split_type='Line'
)

## Clean Up

In [None]:
sess.delete_endpoint(predictor.endpoint)