# Training the V1 model

## Setup

In [1]:
# sagemaker
import boto3
import sagemaker
from sagemaker import get_execution_role

# import a PyTorch wrapper
from sagemaker.pytorch import PyTorch

# importing PyTorchModel
from sagemaker.pytorch import PyTorchModel

In [2]:
# SageMaker session and role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

# default S3 bucket
bucket = sagemaker_session.default_bucket()

## Data

In [3]:
# iterate through S3 objects and print contents
counter = 0
for obj in boto3.resource('s3').Bucket(bucket).objects.all():
    if counter == 10:
        break
    print(obj.key)
    counter += 1

images/train/wario/wario_land_3_img_0.jpg
images/train/wario/wario_land_3_img_10.jpg
images/train/wario/wario_land_3_img_100.jpg
images/train/wario/wario_land_3_img_1000.jpg
images/train/wario/wario_land_3_img_1001.jpg
images/train/wario/wario_land_3_img_1002.jpg
images/train/wario/wario_land_3_img_1003.jpg
images/train/wario/wario_land_3_img_1004.jpg
images/train/wario/wario_land_3_img_1005.jpg
images/train/wario/wario_land_3_img_1006.jpg


## Training

In [4]:
input_data = 's3://sagemaker-eu-west-1-873555039102/images'

In [5]:
# specify an output path
prefix = 'model_v1'
output_path = 's3://{}/{}'.format(bucket, prefix)

# instantiate a pytorch estimator
estimator = PyTorch(entry_point='train.py',
                    source_dir='model_v1', 
                    role=role,
                    framework_version='1.0',
                    train_instance_count=1,
                    train_instance_type='ml.c4.xlarge',
                    output_path=output_path,
                    sagemaker_session=sagemaker_session,
                    hyperparameters={
                        'epochs': 5
                    })

In [6]:
%%time 
# train the estimator on S3 training data
estimator.fit({'train': input_data})

2020-04-07 23:31:21 Starting - Starting the training job...
2020-04-07 23:31:23 Starting - Launching requested ML instances......
2020-04-07 23:32:48 Starting - Preparing the instances for training......
2020-04-07 23:33:45 Downloading - Downloading input data......
2020-04-07 23:34:44 Training - Downloading the training image..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2020-04-07 23:35:00,428 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2020-04-07 23:35:00,431 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-04-07 23:35:00,444 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2020-04-07 23:35:00,445 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2020-04-07 23:35:00,794 sagemaker-containers INFO     Module train does not

UnexpectedStatusException: Error for Training job sagemaker-pytorch-2020-04-07-23-31-21-522: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
Command "/usr/bin/python -m train --epochs 5"
Traceback (most recent call last):
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/ml/code/train.py", line 15, in <module>
    from dl_utils import *
  File "/opt/ml/code/dl_utils.py", line 8, in <module>
    from skimage.io import imread, imshow
ModuleNotFoundError: No module named 'skimage'

## Deploying the model for inference

In [None]:



# Create a model from the trained estimator data
# And point to the prediction script
model = PyTorchModel(model_data=estimator.model_data,
                     role = role,
                     framework_version='1.0',
                     entry_point='predict.py',
                     source_dir='source_solution')

In [None]:
%%time
# deploy and create a predictor
predictor = model.deploy(initial_instance_count=1, instance_type='ml.t2.medium')

In [None]:
s3://bucketname/image_folder