In [None]:
%store -r
print ('data_bucket_name=',data_bucket_name)

In [None]:
import time
import sagemaker
from sagemaker.tensorflow import TensorFlow
sess = sagemaker.Session()
role = sagemaker.get_execution_role()

In [None]:
job_name='har-tf-'+time.strftime('%Y-%m-%d-%H-%M-%S')
print(job_name)

In [None]:
hyperparameter_json={
    'epochs': 5,
    'batch_size': 64}

In [None]:
#https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html
tf_estimator = TensorFlow(use_spot_instances=False,
                          enable_sagemaker_metrics=True,
                          entry_point='train_tf.py', 
                          #model_uri='s3://bucket/folder/output/model.tar.gz',
                          role=role,
                          instance_count=1, 
                          instance_type='ml.m5.large',
                          framework_version='1.12', 
                          volume_size=8,
                          py_version='py3',
                          script_mode=True,
                          hyperparameters=hyperparameter_json
                         )

In [None]:
#https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html#sagemaker.estimator.EstimatorBase.fit

tf_estimator.fit(
    inputs={
    'training': 's3://'+data_bucket_name+'/train',
    'test': 's3://'+data_bucket_name+'/test'
    },
    wait=True,
    job_name=job_name)

In [None]:
#https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html

import boto3
import json
import pprint
client = boto3.client('sagemaker')

response = client.describe_training_job(
    TrainingJobName=job_name
)

while (response['TrainingJobStatus'] not in ('Failed','Completed','Stopped','Stopping','Interrupted','MaxRuntimeExceeded')):
    print (response['TrainingJobStatus'])
    time.sleep(15)
    response = client.describe_training_job(TrainingJobName=job_name)
    

#pprint.pprint(response)
print('job_name=',job_name)
print('TrainingTimeInSeconds=',response['TrainingTimeInSeconds'])
print('TrainingJobStatus=',response['TrainingJobStatus'])
print('S3ModelArtifacts=',response['ModelArtifacts']['S3ModelArtifacts'])

In [None]:
#https://sagemaker.readthedocs.io/en/stable/frameworks/tensorflow/deploying_tensorflow_serving.html#deploying-directly-from-model-artifacts

from sagemaker.tensorflow import TensorFlowModel
tf_endpoint_name = 'ep-'+job_name

model_artifact = response['ModelArtifacts']['S3ModelArtifacts']

model = TensorFlowModel(model_data=model_artifact, role=role, framework_version='1.12')
                                       
predictor = model.deploy(initial_instance_count=1, 
                         instance_type='ml.t2.medium',
                         endpoint_name=tf_endpoint_name,
                         #accelerator_type='ml.eia1.medium'
                         wait=True)

In [None]:
#https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingJob.html

import boto3
import json
import pprint
client = boto3.client('sagemaker')

response = client.describe_endpoint(
    EndpointName=tf_endpoint_name
)

while (response['EndpointStatus'] not in ('InService')):
    time.sleep(15)
    response = client.describe_endpoint(
    EndpointName=tf_endpoint_name)
    print(response['EndpointStatus'])
    
pprint.pprint(response)
print('tf_endpoint_name=',tf_endpoint_name)

In [None]:
%store tf_endpoint_name
%store data_bucket_name