## Training a Custom TensorFlow Model in SageMaker

Let's see how to train a custom tensorflow model in the cloud using SageMaker's training service.

Remember to build train.py script and use it as a entry_pont when you create the tensorflowmodel


### Import dependencies

In [None]:
import sagemaker
# import os
from sagemaker.tensorflow import TensorFlow
from sagemaker.tensorflow import TensorFlowModel
# import boto3
# import io 
import numpy as np
# import sagemaker.amazon.common as smac
# from s3fs.core import S3FileSystem

from sagemaker import get_execution_role
role = get_execution_role()
sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_session.region_name


## Setup SageMaker

## Docker image and job_name

In [None]:
from datetime import datetime
date = datetime.now().strftime("%y%m%d-%H%M%S")

image_uri_cpu='763104351884.dkr.ecr.{}.amazonaws.com/tensorflow-training:2.3.1-cpu-py37-ubuntu18.04'.format(region)
image_uri_gpu='763104351884.dkr.ecr.{}.amazonaws.com/tensorflow-training:2.3.1-gpu-py37-cu102-ubuntu18.04'.format(region)
image_uri = image_uri_gpu

epochs = 200

instance_type = 'ml.m5.large'

device='gpu'

job_name = '{}-hand-gesture-{}-{}-{}e'.format(
    date,
    instance_type.replace('.','-').replace('ml-', ''),
    device,
    epochs)

## Build a TensorFlow Estimator

In [3]:
estimator = TensorFlow(entry_point='train.py',
                      role=role,
                      instance_count=1,
                      instance_type=instance_type,
                      image_uri=image_uri,
                      model_dir='s3://sagemaker-us-east-1-410677554255/tensorflow2/',
                      hyperparameters={
                          'epochs': epochs
                      },
                      script_mode=False)

In [4]:
estimator.hyperparameters()

{'epochs': '200',
 'model_dir': '"s3://sagemaker-us-east-1-410677554255/tensorflow2/"'}

# Start Training Job
( Only train if you don´t have any tensorflow model yet)

# Con GPU


In [7]:
estimator.fit({'train': 's3://sagemaker-us-east-1-410677554255/hand_gesture_tensorflow/data/'
               },
              wait=False, # True makes notebook wait and logs output in real time
              job_name=job_name)
             

In [7]:
estimator.attach(job_name)


2022-11-23 17:20:28 Starting - Starting the training job.
2022-11-23 17:20:42 Starting - Preparing the instances for training............
2022-11-23 17:21:49 Downloading - Downloading input data....
2022-11-23 17:22:14 Training - Downloading the training image.............................
2022-11-23 17:24:45 Training - Training image download completed. Training in progress................
2022-11-23 17:26:05 Uploading - Uploading generated training model.
2022-11-23 17:26:16 Completed - Training job completed


<sagemaker.tensorflow.estimator.TensorFlow at 0x7f0f447c0520>

In [12]:
estimator

<sagemaker.tensorflow.estimator.TensorFlow at 0x7f0f8674d9a0>

# Load the estimator

In [4]:
model = TensorFlowModel(model_data='s3://sagemaker-us-east-1-410677554255/221123-172023-hand-gesture-m5-large-gpu-200e/output/model.tar.gz',
                        role=role,
                       image_uri = '763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:2.2.0-gpu-py37-cu102-ubuntu18.04' )



# Deploying the model

In [5]:
type(model)

sagemaker.tensorflow.model.TensorFlowModel

In [6]:
tensorflow_model = model.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge')

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


-------!

# Inference

In [8]:
result  = tensorflow_model.predict([0.0,0.0,-0.1527777777777778,-0.12037037037037036,-0.2175925925925926,-0.3101851851851852,-0.125,-0.44907407407407407,0.004629629629629629,-0.5,-0.14351851851851852,-0.4861111111111111,-0.20833333333333334,-0.6851851851851852,-0.24074074074074073,-0.8055555555555556,-0.2638888888888889,-0.9166666666666666,-0.023148148148148147,-0.49537037037037035,0.004629629629629629,-0.7453703703703703,0.009259259259259259,-0.8796296296296297,0.027777777777777776,-1.0,0.07407407407407407,-0.44907407407407407,0.0787037037037037,-0.6435185185185185,0.009259259259259259,-0.5046296296296297,-0.018518518518518517,-0.38425925925925924,0.16203703703703703,-0.36574074074074076,0.1388888888888889,-0.5046296296296297,0.06481481481481481,-0.42592592592592593,0.023148148148148147,-0.3333333333333333])

In [9]:

np.argmax(result['predictions'])

4

In [11]:
tensorflow_model

<sagemaker.tensorflow.model.TensorFlowPredictor at 0x7f53cf02f850>