### Imports 

In [29]:
from sagemaker.tensorflow import TensorFlow
from sagemaker import get_execution_role
import sagemaker
import boto3
import os

In [30]:
sagemaker.__version__

'2.16.1'

### Essentials

In [31]:
role = get_execution_role()
session = boto3.Session()
sagemaker_session = sagemaker.Session()

s3 = session.resource('s3')
TF_FRAMEWORK_VERSION = '2.3.0'

BUCKET = 'cv-examples-892313895307' # USE YOUR ACCOUNT ID OR INITIALS AS SUFFIX
PREFIX = 'cifar-clf'

### Test Local Mode

In [8]:
estimator = TensorFlow(entry_point='cifar_train.py',
                       model_dir='/opt/ml/model/1/', # Note: this will a s3 path for real run
                       instance_type='local',
                       instance_count=1,
                       role=role,
                       framework_version=TF_FRAMEWORK_VERSION, 
                       py_version='py37',
                       script_mode=True)

In [9]:
estimator.fit()

Creating tmpkonh4llu_algo-1-rtot4_1 ... 
[1BAttaching to tmpkonh4llu_algo-1-rtot4_12mdone[0m
[36malgo-1-rtot4_1  |[0m 2020-11-19 17:48:12,631 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
[36malgo-1-rtot4_1  |[0m 2020-11-19 17:48:12,641 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-rtot4_1  |[0m 2020-11-19 17:48:13,849 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-rtot4_1  |[0m 2020-11-19 17:48:13,969 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-rtot4_1  |[0m 2020-11-19 17:48:13,990 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-rtot4_1  |[0m 2020-11-19 17:48:14,001 sagemaker-training-toolkit INFO     Invoking user script
[36malgo-1-rtot4_1  |[0m 
[36malgo-1-rtot4_1  |[0m Training Env:
[36malgo-1-rtot4_1  |[0m 
[36malgo-1-rtot4_

### Real Run - using SageMaker Training

In [32]:
"""
estimator = TensorFlow(entry_point='cifar_train.py',
                       model_dir=f's3://{BUCKET}/{PREFIX}/model-artifacts/1/',
                       instance_type='ml.m5.xlarge',
                       output_path=f's3://{BUCKET}/{PREFIX}/out',
                       instance_count=1,
                       role=role,
                       framework_version=TF_FRAMEWORK_VERSION, 
                       py_version='py37',
                       script_mode=True)
"""

In [37]:
model_name = 'cifar-model-1'

estimator_parameters = {'entry_point':'cifar_train.py',
                        'instance_type': 'ml.m5.large',
                        'instance_count': 1,
                        'model_dir': f'/opt/ml/model',
                        'role': role,
                        'output_path': f's3://{BUCKET}/{PREFIX}/out',
                        'base_job_name': f'cv-{model_name}',
                        'framework_version': TF_FRAMEWORK_VERSION,
                        'py_version': 'py37',
                        'script_mode': True}

estimator = TensorFlow(**estimator_parameters)

estimator.fit()

2020-11-19 19:02:12 Starting - Starting the training job...
2020-11-19 19:02:17 Starting - Launching requested ML instances......
2020-11-19 19:03:33 Starting - Preparing the instances for training......
2020-11-19 19:04:40 Downloading - Downloading input data
2020-11-19 19:04:40 Training - Downloading the training image...
2020-11-19 19:05:04 Training - Training image download completed. Training in progress..[34m2020-11-19 19:05:07,779 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-11-19 19:05:07,788 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-11-19 19:05:08,140 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-11-19 19:05:08,157 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-11-19 19:05:08,177 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed

[34m1407/1407 - 24s - loss: 1.5988 - accuracy: 0.4165 - val_loss: 1.3875 - val_accuracy: 0.4922[0m
[34mEpoch 2/10[0m
[34m1407/1407 - 22s - loss: 1.3002 - accuracy: 0.5304 - val_loss: 1.1536 - val_accuracy: 0.5844[0m
[34mEpoch 3/10[0m
[34m1407/1407 - 22s - loss: 1.1899 - accuracy: 0.5713 - val_loss: 1.0692 - val_accuracy: 0.6228[0m
[34mEpoch 4/10[0m
[34m1407/1407 - 22s - loss: 1.1135 - accuracy: 0.6033 - val_loss: 1.0488 - val_accuracy: 0.6220[0m
[34mEpoch 5/10[0m
[34m1407/1407 - 22s - loss: 1.0524 - accuracy: 0.6267 - val_loss: 0.9327 - val_accuracy: 0.6712[0m
[34mEpoch 6/10[0m
[34m1407/1407 - 22s - loss: 1.0067 - accuracy: 0.6420 - val_loss: 0.9251 - val_accuracy: 0.6810[0m
[34mEpoch 7/10[0m
[34m1407/1407 - 22s - loss: 0.9648 - accuracy: 0.6581 - val_loss: 0.8823 - val_accuracy: 0.6938[0m
[34mEpoch 8/10[0m
[34m1407/1407 - 22s - loss: 0.9248 - accuracy: 0.6719 - val_loss: 0.8768 - val_accuracy: 0.6942[0m
[34mEpoch 9/10[0m
[34m1407/1407 - 22s - loss: 0.8

In [40]:
model_location = estimator.model_data
model_location

's3://cv-examples-892313895307/cifar-clf/out/cv-cifar-model-1-2020-11-19-19-02-11-892/output/model.tar.gz'

In [41]:
output_1 = f's3://{BUCKET}/{PREFIX}/mme/model1.tar.gz'

In [42]:
!aws s3 cp {model_location} {output_1} 

Completed 5.2 MiB/5.2 MiB (19.1 MiB/s) with 1 file(s) remainingcopy: s3://cv-examples-892313895307/cifar-clf/out/cv-cifar-model-1-2020-11-19-19-02-11-892/output/model.tar.gz to s3://cv-examples-892313895307/cifar-clf/mme/model1.tar.gz


### Deploy Model

In [43]:
from sagemaker.tensorflow import TensorFlowModel

In [49]:
model = TensorFlowModel(model_data=output_1, 
                        role=role,  
                        name='cv-cifar-model', 
                        sagemaker_session=sagemaker_session, 
                        framework_version=TF_FRAMEWORK_VERSION)

In [50]:
predictor = model.deploy(initial_instance_count=1, 
                         instance_type='ml.m5.large', 
                         endpoint_name='cv-model-1-endpoint')

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


-------------!

### Test Deployed Endpoint

In [79]:
from tensorflow.keras.preprocessing import image
import numpy as np

In [80]:
img_path = './DATA/CIFAR_10/truck.png'
img = image.load_img(img_path)
data = image.img_to_array(img)
data = data.astype('float32')/255
data = data.reshape(1, 32, 32, 3)
data.shape

(1, 32, 32, 3)

In [81]:
predictor.predict(data)

{'predictions': [[0.098579973,
   0.00670899451,
   0.105586112,
   0.493491143,
   0.0975488,
   0.014357822,
   0.0138585987,
   0.0262283403,
   0.136159331,
   0.00748087186]]}