# Usando o algoritmo customizado no SageMaker

## Criando o setup do ambiente

In [2]:
# S3 prefix
prefix = 'martinig-sagemaker'

# Define IAM role
import boto3
import re
import os
import numpy as np
import pandas as pd
import sagemaker as sage
from sagemaker import get_execution_role
from time import gmtime, strftime

sess = sage.Session()

role = get_execution_role()

## Efetuando o upload do dado de exemplo

In [3]:
WORK_DIRECTORY = 'data'
data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

## Criando o Estimator do SageMaker e inicializando o treino

In [4]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
# imagem do container customizado que fizemos upload no S3
image = '{}.dkr.ecr.{}.amazonaws.com/scikittest:latest'.format(account, region)

tree = sage.estimator.Estimator(image,
                       role, 1, 'ml.c5.2xlarge',
                       output_path="s3://{}/output".format(sess.default_bucket()),
                       sagemaker_session=sess)

tree.fit(data_location)

2020-05-31 23:34:52 Starting - Starting the training job...
2020-05-31 23:34:54 Starting - Launching requested ML instances......
2020-05-31 23:36:06 Starting - Preparing the instances for training...
2020-05-31 23:36:38 Downloading - Downloading input data...
2020-05-31 23:37:17 Training - Training image download completed. Training in progress..[34mStarting the training.[0m
[34mTraining complete.[0m

2020-05-31 23:37:29 Uploading - Uploading generated training model
2020-05-31 23:37:29 Completed - Training job completed
Training seconds: 51
Billable seconds: 51


## Efetuando o deploy do modelo

In [5]:
from sagemaker.predictor import csv_serializer
predictor = tree.deploy(1, 'ml.m4.xlarge', serializer=csv_serializer)

-------------!

## Realizando algumas inferências

In [6]:
shape=pd.read_csv("data/iris.csv", header=None)
shape.sample(3)

Unnamed: 0,0,1,2,3,4
124,virginica,6.7,3.3,5.7,2.1
43,setosa,5.0,3.5,1.6,0.6
68,versicolor,6.2,2.2,4.5,1.5


In [7]:
# drop the label column in the training set
shape.drop(shape.columns[[0]],axis=1,inplace=True)
shape.sample(3)

Unnamed: 0,1,2,3,4
47,4.6,3.2,1.4,0.2
109,7.2,3.6,6.1,2.5
135,7.7,3.0,6.1,2.3


In [8]:
import itertools

a = [50*i for i in range(3)]
b = [40+i for i in range(10)]
indices = [i+j for i,j in itertools.product(a,b)]

test_data=shape.iloc[indices[:-1]]

In [9]:
print(predictor.predict(test_data.values).decode('utf-8'))

setosa
setosa
setosa
setosa
setosa
setosa
setosa
setosa
setosa
setosa
versicolor
versicolor
versicolor
versicolor
versicolor
versicolor
versicolor
versicolor
versicolor
versicolor
virginica
virginica
virginica
virginica
virginica
virginica
virginica
virginica
virginica



## Excluindo o endpoint de inferência

In [10]:
sess.delete_endpoint(predictor.endpoint)