In [1]:
import sagemaker

# Create SageMaker session
sagemaker_session = sagemaker.Session()

# Data Bucket
bucket = sagemaker_session.default_bucket()
prefix = 'sagemaker/DEMO-pytorch-mnist'

role = sagemaker.get_execution_role()

In [2]:
# Getting the MNIST Dataset
from torchvision import datasets, transforms

datasets.MNIST('data', download=True, transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
]))

  0%|          | 16384/9912422 [00:00<01:13, 134955.24it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 19079028.10it/s]                           


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz


32768it [00:00, 510826.73it/s]
0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:01, 1200316.54it/s]                             
8192it [00:00, 259934.78it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!


Dataset MNIST
    Number of datapoints: 60000
    Split: train
    Root Location: data
    Transforms (if any): Compose(
                             ToTensor()
                             Normalize(mean=(0.1307,), std=(0.3081,))
                         )
    Target Transforms (if any): None

In [3]:
# Upload dataset to S3 bucket
inputs = sagemaker_session.upload_data(path='data', bucket=bucket, key_prefix=prefix)
print('input spec (in this case, just an S3 path): {}'.format(inputs))

input spec (in this case, just an S3 path): s3://sagemaker-us-east-1-356045791542/sagemaker/DEMO-pytorch-mnist


In [4]:
# Training Script
!pygmentize mnist.py

[34mimport[39;49;00m [04m[36margparse[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m
[34mimport[39;49;00m [04m[36mlogging[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m
[34mimport[39;49;00m [04m[36msagemaker_containers[39;49;00m
[34mimport[39;49;00m [04m[36msys[39;49;00m
[34mimport[39;49;00m [04m[36mtorch[39;49;00m
[34mimport[39;49;00m [04m[36mtorch.distributed[39;49;00m [34mas[39;49;00m [04m[36mdist[39;49;00m
[34mimport[39;49;00m [04m[36mtorch.nn[39;49;00m [34mas[39;49;00m [04m[36mnn[39;49;00m
[34mimport[39;49;00m [04m[36mtorch.nn.functional[39;49;00m [34mas[39;49;00m [04m[36mF[39;49;00m
[34mimport[39;49;00m [04m[36mtorch.optim[39;49;00m [34mas[39;49;00m [04m[36moptim[39;49;00m
[34mimport[39;49;00m [04m[36mtorch.utils.data[39;49;00m
[34mimport[39;49;00m [04m[36mtorch.utils.data.distributed[39;49;00m
[34mfrom[39;49;00m [04m[36mtorchvision[39;49;00m [34mimport[39;49;00m datasets, tran

In [5]:
# Run training in SageMaker
from sagemaker.pytorch import PyTorch

estimator = PyTorch(entry_point='mnist.py',
                    role=role,
                    framework_version='1.1.0',
                    train_instance_count=2,
                    train_instance_type='ml.c4.xlarge',
                    hyperparameters={
                        'epochs': 6,
                        'backend': 'gloo'
                    })

In [6]:
estimator.fit({'training': inputs})

2019-10-07 19:46:43 Starting - Starting the training job...
2019-10-07 19:46:46 Starting - Launching requested ML instances......
2019-10-07 19:47:51 Starting - Preparing the instances for training......
2019-10-07 19:49:04 Downloading - Downloading input data...
2019-10-07 19:49:36 Training - Training image download completed. Training in progress.[31mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[31mbash: no job control in this shell[0m
[31m2019-10-07 19:49:32,360 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[31m2019-10-07 19:49:32,362 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[31m2019-10-07 19:49:32,374 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[32m2019-10-07 19:49:38,599 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[31mbash: cannot set terminal process group (-1): Inapprop

[31mTest set: Average loss: 0.3247, Accuracy: 9098/10000 (91%)
[0m
[32mTest set: Average loss: 0.3247, Accuracy: 9098/10000 (91%)
[0m
[31mTest set: Average loss: 0.1927, Accuracy: 9423/10000 (94%)
[0m
[32mTest set: Average loss: 0.1927, Accuracy: 9423/10000 (94%)
[0m
[31mTest set: Average loss: 0.1444, Accuracy: 9547/10000 (95%)
[0m
[32mTest set: Average loss: 0.1444, Accuracy: 9547/10000 (95%)
[0m
[31mTest set: Average loss: 0.1273, Accuracy: 9616/10000 (96%)
[0m
[32mTest set: Average loss: 0.1273, Accuracy: 9616/10000 (96%)
[0m
[32mTest set: Average loss: 0.1062, Accuracy: 9661/10000 (97%)
[0m
[31mTest set: Average loss: 0.1062, Accuracy: 9661/10000 (97%)
[0m
[31mTest set: Average loss: 0.0978, Accuracy: 9700/10000 (97%)
[0m
[31mSaving the model.[0m
[31m2019-10-07 19:51:14,150 sagemaker-containers INFO     Reporting training SUCCESS[0m
[32mTest set: Average loss: 0.0978, Accuracy: 9700/10000 (97%)
[0m
[32mSaving the model.[0m
[32m2019-10-07 19:51:14,167

In [7]:
# Deploy the model to an endpoint for hosting
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

---------------------------------------------------------------------------------------!

In [10]:
from IPython.display import HTML
HTML(open("index.html").read())

In [13]:
import numpy as np

image = np.array([data], dtype=np.float32)
response = predictor.predict(image)
prediction = response.argmax(axis=1)[0]
print(prediction)

5
