# Model training

In [None]:
import time, os, sys
import sagemaker, boto3
import numpy as np
import pandas as pd
from sagemaker.tensorflow import TensorFlow


sess = boto3.Session()
sm   = sess.client('sagemaker')
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)

# # ----- Upload TFRecords to S3 bucket first -----
# # You can upload the data to S3 in the SageMaker
# datasets_bucketpath = sagemaker_session.upload_data(path='cifar10', key_prefix='datasets/cifar10-dataset')
# # Or directly upload the data from your local computer to S3 without going through SageMaker. In this case, provide the bucket path
datasets_bucketpath = 's3://ai4boundaries'



from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent

training_experiment = Experiment.create(
                                experiment_name = "sagemaker-training-experiments",
                                description     = "Experiment to track ResUnet training trials",
                                sagemaker_boto_client=sm)

single_gpu_trial = Trial.create(
    trial_name = 'sagemaker-single-gpu-training',
    experiment_name = training_experiment.experiment_name,
    sagemaker_boto_client = sm,
)

trial_comp_name = 'single-gpu-training-job'
experiment_config = {"ExperimentName": training_experiment.experiment_name,
                       "TrialName": single_gpu_trial.trial_name,
                       "TrialComponentDisplayName": trial_comp_name}



# # ----- Train the model with fixed hyperparameters -----
hyperparams={'epochs'       : 5,
             'learning-rate': 0.01,
             'batch-size'   : 8}

bucket_name = sagemaker_session.default_bucket()
output_path = f's3://{bucket_name}/jobs'

# Check how to revise this
metric_definitions = [{'Name': 'val_extent_accuracy', 'Regex': 'val_extent_accuracy: ([0-9\\.]+)'}]

tf_estimator = TensorFlow(entry_point          = 'resunet-training-sagemaker.py',
                          output_path          = f'{output_path}/',
                          code_location        = output_path,
                          role                 = role,
                          train_instance_count = 1,
                          train_instance_type  = 'ml.g4dn.xlarge',
                          framework_version    = '1.15.2',
                          py_version           = 'py3',
                          input_mode           = 'Pipe',
#                          script_mode          = True,
                          metric_definitions   = metric_definitions,
                          sagemaker_session    = sagemaker_session,
                          hyperparameters      = hyperparams)

job_name=f'tensorflow-single-gpu-{time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())}'
tf_estimator.fit({'training'  : datasets_bucketpath,
                  'validation': datasets_bucketpath,
                  'eval'      : datasets_bucketpath},
                 job_name = job_name,
                 experiment_config=experiment_config)


# Model tuning

In [None]:
import time, os, sys
import sagemaker, boto3
import numpy as np
import pandas as pd
from sagemaker.tensorflow import TensorFlow


sess = boto3.Session()
sm   = sess.client('sagemaker')
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)

# # ----- Upload TFRecords to S3 bucket first -----
# # You can upload the data to S3 in the SageMaker
# datasets_bucketpath = sagemaker_session.upload_data(path='cifar10', key_prefix='datasets/cifar10-dataset')
# # Or directly upload the data from your local computer to S3 without going through SageMaker. In this case, provide the bucket path
datasets_bucketpath = 's3://.......'



# # ----- Automatic model-tuning -----
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner


objective_metric_name = ['val_extent_accuracy']
objective_type = ['Maximize']

# Check how to revise this
metric_definitions = [{'Name': 'val_extent_accuracy', 'Regex': 'val_extent_accuracy: ([0-9\\.]+)'}]


tf_estimator = TensorFlow(entry_point          = 'resunet-training-sagemaker.py',
                          output_path          = f'{output_path}/',
                          code_location        = output_path,
                          role                 = role,
                          train_instance_count = 1,
                          train_instance_type  = 'ml.g4dn.xlarge',
                          framework_version    = '1.15',
                          py_version           = 'py3',
                          script_mode          = True,
                          metric_definitions   = metric_definitions,
                          sagemaker_session    = sagemaker_session)


hyperparameter_ranges = {
    'epochs'        : IntegerParameter(250, 300),
    'learning-rate' : ContinuousParameter(0.001, 0.1, scaling_type='Logarithmic'),
    'batch-size'    : CategoricalParameter(['8','16','32']),
}

tuner = HyperparameterTuner(estimator             = tf_estimator,
                            objective_metric_name = objective_metric_name,
                            hyperparameter_ranges = hyperparameter_ranges,
                            metric_definitions    = metric_definitions,
                            max_jobs              = 16,
                            max_parallel_jobs     = 8,
                            objective_type        = objective_type)

job_name=f'tf-hpo-{time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())}'
tuner.fit({'training'  : datasets_bucketpath,
           'validation': datasets_bucketpath,
           'eval'      : datasets_bucketpath},
            job_name = job_name)