## Create SageMaker Training job
Develop the code for the trainning

In [13]:
import os
from time import gmtime, strftime
import sagemaker
from sagemaker import get_execution_role
from sagemaker.tensorflow import TensorFlow
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

def sagemaker_estimator(sagemaker_role,code_entry,code_dir, instance_type, instance_count, hyperparameters, metric_definitions):
    sm_estimator = TensorFlow(entry_point=code_entry,
                              source_dir=code_dir,
                              role=sagemaker_role,
                              instance_type=instance_type,
                              instance_count=instance_count,
                              model_dir='/opt/ml/model',
                              hyperparameters=hyperparameters,
                              metric_definitions=metric_definitions,
                              framework_version='2.2',
                              py_version='py37',
                              use_spot_instances=True,
                              max_wait=7200,
                              max_run=3600,
                              max_retry_attempts=5,
                              script_mode=True)
    return sm_estimator


def sagemaker_training(sm_estimator,train_s3,training_job_name):
    sm_estimator.fit(train_s3, job_name=training_job_name, wait=False)

def sagemaker_hyperparam_tuning(sm_estimator, train_s3, hyperparameter_ranges, metric_definitions, tuning_job_name, max_jobs, max_parallel_jobs):
    objective_metric_name = 'validation:error'
    objective_type = 'Minimize'
    tuner = HyperparameterTuner(estimator=sm_estimator,
                                objective_metric_name=objective_metric_name,
                                hyperparameter_ranges=hyperparameter_ranges,
                                metric_definitions=metric_definitions,
                                max_jobs=max_jobs,
                                max_parallel_jobs=max_parallel_jobs,
                                objective_type=objective_type)

    tuner.fit(train_s3, job_name=tuning_job_name, wait=False)    
    
if __name__ == '__main__':
    
    session = sagemaker.Session()
    sagemaker_role = get_execution_role()
    
#     train_s3 = "s3://asaf-sagemaker-datasets/final_dataset/output_1642495104/part-00000-db74d4ca-2111-4b23-a734-0f2b4ecd417f-c000.csv"
    train_s3 = "s3://asaf-sagemaker-datasets/final_dataset/output_1642495104/part-00000-3f09f695-c1fb-4d21-b38d-6840b7cdc5a1-c000.csv"
               
#     print(train_s3)
#     print(os.getcwd())
    
    code_entry = 'local_train.py'
    code_dir = os.getcwd() + '/local_training/'
#     print(code_dir)
#     instance_type = 'ml.c5.xlarge'
#     instance_count = 1
    hyperparameters = {'epochs': 250,
                       'batch_size': 5,
                       'es_patience': 40}

    metric_definitions = [
        {'Name': 'train:error', 'Regex': 'loss: ([0-9\\.]+)'},
        {'Name': 'validation:error', 'Regex': 'val_loss: ([0-9\\.]+)'},
        {'Name': 'validation:accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'}
    ]
    
    # get the tensorflow estumator
    sm_estimator = sagemaker_estimator(sagemaker_role, code_entry, code_dir, instance_type, instance_count, hyperparameters, metric_definitions)
    

    
    # sagemaker training job
    training_job_name = "tf-nba-training-{}".format(strftime("%d-%H-%M-%S", gmtime()))
    sagemaker_training(sm_estimator, train_s3, training_job_name)    
    
    # sagemaker tuning job
#     hyperparameter_ranges = {
#         'epochs': IntegerParameter(50, 200),
#         'batch_size': IntegerParameter(1, 10),
#         'es_patience': IntegerParameter(20, 60)
#     }

#     tuning_job_name = "tf-nba-training-{}".format(strftime("%d-%H-%M-%S", gmtime()))
#     max_jobs = 4
#     max_parallel_jobs = 2
#     sagemaker_hyperparam_tuning(sm_estimator, train_s3, hyperparameter_ranges, metric_definitions, tuning_job_name, max_jobs, max_parallel_jobs)


## Deploy the model from SageMaker

In [24]:
#print(dir(sm_estimator))

#print(dir(sm_estimator.latest_training_job))
print(sm_estimator.latest_training_job.describe())

{'TrainingJobName': 'tf-nba-training-09-14-25-58', 'TrainingJobArn': 'arn:aws:sagemaker:us-east-1:535518648590:training-job/tf-nba-training-09-14-25-58', 'TrainingJobStatus': 'InProgress', 'SecondaryStatus': 'Training', 'HyperParameters': {'batch_size': '5', 'epochs': '250', 'es_patience': '40', 'model_dir': '"/opt/ml/model"', 'sagemaker_container_log_level': '20', 'sagemaker_job_name': '"tf-nba-training-09-14-25-58"', 'sagemaker_program': '"local_train.py"', 'sagemaker_region': '"us-east-1"', 'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-535518648590/tf-nba-training-09-14-25-58/source/sourcedir.tar.gz"'}, 'AlgorithmSpecification': {'TrainingImage': '763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:2.2-cpu-py37', 'TrainingInputMode': 'File', 'MetricDefinitions': [{'Name': 'train:error', 'Regex': 'loss: ([0-9\\.]+)'}, {'Name': 'validation:error', 'Regex': 'val_loss: ([0-9\\.]+)'}, {'Name': 'validation:accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)'}], 'EnableS

In [25]:
predictor = sm_estimator.deploy(initial_instance_count=1, instance_type="ml.t2.medium")

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


--------!

## Test

In [37]:
input = {
  'instances': [
      [105.9,105.2,0.7,0.169,2.24,19.2,0.073,0.21,0.142,8.6,8.6,0.558,0.587,0.197,74.98,0.15,2.1,4,0.532,0,0,0,0.7,1,0.729,1.4,4.1,5.6,1.4,1.1,0.1,0.5,0.3,2,1.4,5]
  ]
}

result = predictor.predict(input)



print(result)

{'predictions': [[nan]]}
