In [46]:
import time

train_dataset_name = '7sw_1_5__1'
eval_dataset_name = '7sw_1_5__1'
dataset_hyphened_name = train_dataset_name.replace('_', '-') + '-' + f'{time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())}'
layers = 5

### Client & SM sessions

In [47]:
import sagemaker, boto3

sess = boto3.Session()
sm   = sess.client('sagemaker')
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)

### Create experiment

In [48]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent

training_experiment = Experiment.create(experiment_name = f'{dataset_hyphened_name}', 
                                        description     = f'{dataset_hyphened_name}', 
                                        sagemaker_boto_client=sm)

### Create trial

In [49]:
training_trial = Trial.create(trial_name = f'{dataset_hyphened_name}', 
                              experiment_name = training_experiment.experiment_name,
                              sagemaker_boto_client = sm,)
training_trial_comp_name = f'{dataset_hyphened_name}'
experiment_config = {"ExperimentName": training_experiment.experiment_name, 
                       "TrialName": training_trial.trial_name,
                       "TrialComponentDisplayName": training_trial_comp_name}

### Run training job & visualize results

In [50]:
from sagemaker.tensorflow import TensorFlow

dataframe_dir = f's3://obstacles-classification/{train_dataset_name}'

hyperparams={'batch-size'   : 16,
             'optimizer'    : 'adam',
             'dataframe_dir': dataframe_dir}

bucket_name = sagemaker_session.default_bucket()
output_path = f's3://{bucket_name}/obstacles_classification/jobs/{train_dataset_name}'

metric_definitions = [
    {'Name': 'auc', 'Regex': 'auc: ([0-9\\.]+)'},
    {'Name': 'recall', 'Regex': 'recall: ([0-9\\.]+)'},
    {'Name': 'specifity', 'Regex': 'specifity: ([0-9\\.]+)'},
    {'Name': 'accuracy', 'Regex': 'accuracy: ([0-9\\.]+)'},
    {'Name': 'loss', 'Regex': 'loss: ([0-9\\.]+)'},
    {'Name': 'validation auc', 'Regex': 'val_auc: ([0-9\\.]+)'},
    {'Name': 'validation recall', 'Regex': 'val_recall: ([0-9\\.]+)'},
    {'Name': 'validation specifity', 'Regex': 'val_specifity: ([0-9\\.]+)'},
    {'Name': 'validation accuracy', 'Regex': 'val_categorical_accuracy: ([0-9\\.]+)'},
    {'Name': 'validation loss', 'Regex': 'val_loss: ([0-9\\.]+)'},
    {'Name': 'test auc', 'Regex': 'test_auc: ([0-9\\.]+)'},
    {'Name': 'test recall', 'Regex': 'test_recall: ([0-9\\.]+)'},
    {'Name': 'test specifity', 'Regex': 'test_specifity: ([0-9\\.]+)'},
    {'Name': 'test accuracy', 'Regex': 'test_accuracy: ([0-9\\.]+)'},
    {'Name': 'test loss', 'Regex': 'test_loss: ([0-9\\.]+)'},
    {'Name': 'epoch', 'Regex': 'Epoch ([0-9]+)'},
]

tf_estimator = TensorFlow(entry_point          = '7_channels_weights_lrrt.py', 
                          output_path          = f'{output_path}/',
                          code_location        = output_path,
                          role                 = role,
                          train_instance_count = 1, 
                          train_instance_type  = 'ml.c5.xlarge',
                          framework_version    = '2.3', 
                          py_version           = 'py37',
                          script_mode          = True,
                          metric_definitions   = metric_definitions,
                          debugger_hook_config = False,
                          sagemaker_session    = sagemaker_session,
                          hyperparameters      = hyperparams)

job_name=f'obstacles-classification-{time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())}'
training_dataset = f's3://obstacles-classification/{train_dataset_name}/train'
tf_estimator.fit({'training'  : training_dataset},
                  job_name = job_name,
                  experiment_config=experiment_config)

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: obstacles-classification-2022-01-11-13-12-43


2022-01-11 13:12:43 Starting - Starting the training job...
2022-01-11 13:13:08 Starting - Launching requested ML instancesProfilerReport-1641906763: InProgress
......
2022-01-11 13:14:10 Starting - Preparing the instances for training......
2022-01-11 13:15:10 Downloading - Downloading input data......
2022-01-11 13:16:11 Training - Downloading the training image..[34m2022-01-11 13:16:17.978064: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2022-01-11 13:16:17.986066: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.[0m
[34m2022-01-11 13:16:18.281651: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2022-01-11 13:16:21,627 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34