# Base de dados MNIST

- Imagens: https://github.com/myleott/mnist_png

In [None]:
import numpy as np
import tarfile
import urllib
import mxnet as mx
import json

In [None]:
base = tarfile.open('mnist_png.tar.gz')
base.extractall('./')
base.close()

In [4]:
# https://awstip.com/make-recordio-files-for-training-on-amazon-sagemaker-a5fe2e6c22eb
script_url = "https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/im2rec.py"
urllib.request.urlretrieve(script_url, 'im2rec.py')

('im2rec.py', <http.client.HTTPMessage at 0x7f0d70115210>)

In [3]:
# lst -> rec
!python im2rec.py --list --recursive train_data_mnist mnist_png/training

0 0
0/.ipynb_checkpoints 1
1 2
2 3
3 4
4 5
5 6
6 7
7 8
8 9
9 10


In [4]:
!python im2rec.py --list --recursive test_data_mnist mnist_png/testing

0 0
1 1
1/.ipynb_checkpoints 2
2 3
3 4
4 5
5 6
6 7
7 8
8 9
9 10


In [5]:
!python im2rec.py --num-thread 2 train_data_mnist mnist_png/training

Creating .rec file from /root/train_data_mnist.lst in /root
time: 0.0287168025970459  count: 0
time: 1.3190088272094727  count: 1000
time: 1.2087807655334473  count: 2000
time: 1.2867724895477295  count: 3000
time: 1.1990466117858887  count: 4000
time: 1.5538105964660645  count: 5000
time: 1.200986623764038  count: 6000
time: 1.220735788345337  count: 7000
time: 1.2321898937225342  count: 8000
time: 1.2060832977294922  count: 9000
time: 1.1984918117523193  count: 10000
time: 1.2423968315124512  count: 11000
time: 1.221233606338501  count: 12000
time: 1.1989657878875732  count: 13000
time: 1.1868062019348145  count: 14000
time: 1.2001667022705078  count: 15000
time: 1.217623233795166  count: 16000
time: 1.2024447917938232  count: 17000
time: 1.1769509315490723  count: 18000
time: 1.1879234313964844  count: 19000
time: 1.1901576519012451  count: 20000
time: 1.1850402355194092  count: 21000
time: 1.1825652122497559  count: 22000
time: 1.1819119453430176  count: 23000
time: 1.1945388317108

In [6]:
!python im2rec.py --num-thread 2 test_data_mnist mnist_png/testing

Creating .rec file from /root/test_data_mnist.lst in /root
time: 0.016930341720581055  count: 0
time: 1.858625888824463  count: 1000
time: 1.9015929698944092  count: 2000
time: 1.8985190391540527  count: 3000
time: 2.0030171871185303  count: 4000
time: 1.888801097869873  count: 5000
time: 1.8992176055908203  count: 6000
time: 2.013458251953125  count: 7000
time: 1.9226741790771484  count: 8000
time: 1.9774811267852783  count: 9000
time: 1.940140962600708  count: 10000


# Configurações do SageMaker

In [26]:
import boto3
import sagemaker
from sagemaker import get_execution_role

In [27]:
bucket = 'redeneural'
subpasta_dataset = 'mnist'
role = get_execution_role()
s3_treinamento = 's3://{}/{}/train'.format(bucket, subpasta_dataset)
s3_teste = 's3://{}/{}/test'.format(bucket, subpasta_dataset)
output_location = 's3://{}/{}/output'.format(bucket, subpasta_dataset)
print('Localização da base de treinamento: ', s3_treinamento)
print('Localização da base de teste: ', s3_teste)
print('Localização do modelo: ', output_location)

Localização da base de treinamento:  s3://redeneural/mnist/train
Localização da base de teste:  s3://redeneural/mnist/test
Localização do modelo:  s3://redeneural/mnist/output


# Definição e treinamento da rede neural

In [28]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/ecr-sa-east-1.html
container = sagemaker.image_uris.retrieve(framework = 'image-classification',
                                          region = boto3.Session().region_name)

In [29]:
# https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html
rede_neural = sagemaker.estimator.Estimator(image_uri = container,
                                            role = role,
                                            instance_count = 1,
                                            instance_type = 'ml.p3.2xlarge',
                                            input_mode = 'File',
                                            output_path = output_location)

In [30]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/IC-Hyperparameter.html
rede_neural.set_hyperparameters(num_layers = 110,
                                image_shape = '1,28,28',
                                num_classes = 10,
                                num_training_samples = 50000,
                                mini_batch_size = 128,
                                epochs = 10,
                                learning_rate = 0.001)

In [31]:
train_data = sagemaker.inputs.TrainingInput(s3_treinamento,
                                            content_type = 'application/x-recordio',
                                            s3_data_type = 'S3Prefix')
test_data = sagemaker.inputs.TrainingInput(s3_teste,
                                            content_type = 'application/x-recordio',
                                            s3_data_type = 'S3Prefix')

In [32]:
rede_neural.fit({'train': train_data, 'validation': test_data})

2022-06-02 20:04:39 Starting - Starting the training job...ProfilerReport-1654200279: InProgress
...
2022-06-02 20:05:34 Starting - Preparing the instances for training.........
2022-06-02 20:06:56 Downloading - Downloading input data...
2022-06-02 20:07:31 Training - Downloading the training image...........[34mDocker entrypoint called with argument(s): train[0m
[34m[06/02/2022 20:09:20 INFO 140535754454848] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/image_classification/default-input.json: {'use_pretrained_model': 0, 'num_layers': 152, 'epochs': 30, 'learning_rate': 0.1, 'lr_scheduler_factor': 0.1, 'optimizer': 'sgd', 'momentum': 0, 'weight_decay': 0.0001, 'beta_1': 0.9, 'beta_2': 0.999, 'eps': 1e-08, 'gamma': 0.9, 'mini_batch_size': 32, 'image_shape': '3,224,224', 'precision_dtype': 'float32'}[0m
[34m[06/02/2022 20:09:20 INFO 140535754454848] Merging with provided configuration from /opt/ml/input/config/hyperparameters.json: {'epochs': '10', 'ima

# Deploy e previsões

In [33]:
rede_neural_classificador = rede_neural.deploy(initial_instance_count=1, 
                                               instance_type="ml.m4.xlarge")

-----------!

In [16]:
imagem = open('digit.png', 'rb').read()

In [17]:
runtime = boto3.Session().client(service_name='sagemaker-runtime') 
resposta = runtime.invoke_endpoint(EndpointName='image-classification-2022-05-31-16-53-26-506', 
                                  ContentType='application/x-image', 
                                  Body=bytearray(imagem))

In [18]:
resposta

{'ResponseMetadata': {'RequestId': '448baa2b-5324-4abf-a980-6107f68563b1',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '448baa2b-5324-4abf-a980-6107f68563b1',
   'x-amzn-invoked-production-variant': 'AllTraffic',
   'date': 'Tue, 31 May 2022 16:58:01 GMT',
   'content-type': 'application/json',
   'content-length': '231'},
  'RetryAttempts': 0},
 'ContentType': 'application/json',
 'InvokedProductionVariant': 'AllTraffic',
 'Body': <botocore.response.StreamingBody at 0x7f58a3be1e80>}

In [19]:
resultado = resposta['Body'].read()
resultado = json.loads(resultado)
print(resultado)

[3.0299236186692724e-06, 9.434786989004351e-06, 0.00038098148070275784, 0.9989333748817444, 1.7930641433849814e-06, 0.0003537652373779565, 1.2996001714782324e-05, 4.513800740824081e-05, 0.0002288064279127866, 3.070539241889492e-05]


In [20]:
len(resultado)

10

In [21]:
probabilidade_maior = np.argmax(resultado)
probabilidade_maior

3

In [22]:
rede_neural_classificador.delete_endpoint()