# Running the Dockerfile and pushing to ECR

In [None]:
!sh build_and_push.sh

# Initializing Notebook

In [None]:
import boto3
import re
import sagemaker as sage
from time import gmtime, strftime
import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()
prefix = 'random-forest-data-mart'
sess = sage.Session()
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
data_location = 's3://slytherins-test/Train.csv'
image = '{}.dkr.ecr.{}.amazonaws.com/sagemaker-random-forest:latest'.format(account, region)

# Initialize the Container

In [None]:
tree = sage.estimator.Estimator(image,
                       role, 1, 'ml.m4.xlarge',
                       output_path="s3://{}/output".format(sess.default_bucket()),
                       sagemaker_session=sess,
                       enable_sagemaker_metrics = True,
                       metric_definitions=[
                           {'Name': 'train:error', 'Regex': 'Train_error=(.*?);'},
                           {'Name': 'validation:error', 'Regex': 'Valid_error=(.*?);'}
                                ])

# Train the Model

In [None]:
tree.fit(data_location)

# Deploy the Model

In [None]:
from sagemaker.predictor import csv_serializer
predictor = tree.deploy(1, 'ml.m4.xlarge', serializer=csv_serializer)

# Read Test Data

In [None]:
test_data = pd.read_csv('test_data.csv', header=None)
test_data.head()

# Make Predictions

In [None]:
predictions = predictor.predict(test_data.values).decode('utf-8')

# Batch Transform

In [None]:
import boto3
client = boto3.client('sagemaker')

primary_container = {
    'Image': image,
    'ModelDataUrl': 's3://sagemaker-us-east-2-809912564797/output/sagemaker-random-forest-2020-06-07-07-27-23-190/output/model.tar.gz'
}

create_model_response = client.create_model(
    ModelName = 'Random-Forest-BigMart',
    ExecutionRoleArn = role,
    PrimaryContainer = primary_container)

In [None]:
print(create_model_response['ModelArn'])

In [None]:
import time
from time import gmtime, strftime

batch_job_name = 'RF-Batch-Transform-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
input_location = 's3://slytherins-test/test_data.csv'
output_location = 's3://{}/{}/output/{}'.format('slytherins-test', 'RF-Batch-Transform', batch_job_name)

request = \
{
    "TransformJobName": 'Random-Forest-BigMart-1',
    "ModelName": 'Random-Forest-BigMart',
    "TransformOutput": {
        "S3OutputPath": output_location,
        "Accept": "text/csv",
        "AssembleWith": "Line"
    },
    "TransformInput": {
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3Uri": input_location 
            }
        },
        "ContentType": "text/csv",
        "SplitType": "Line",
        "CompressionType": "None"
    },
    "TransformResources": {
            "InstanceType": "ml.m4.xlarge",
            "InstanceCount": 1
    }
}

client.create_transform_job(**request)
print("Created Transform job with name: ", batch_job_name)

# Getting the Status of Batch Transform Job

In [None]:
# Wait until the job finishes
try:
    client.get_waiter('transform_job_completed_or_stopped').wait(TransformJobName='Random-Forest-BigMart-1')
finally:
    response = client.describe_transform_job(TransformJobName='Random-Forest-BigMart-1')
    status = response['TransformJobStatus']
    print("Transform job ended with status: " + status) 
    if status == 'Failed':
        message =response['FailureReason']
        print('Transform failed with the following error: {}'.format(message))
        raise Exception('Transform job failed')