In [None]:
import os
import json
import boto3
import sagemaker
import numpy as np

In [None]:
from source.config import Config
config = Config(filename="config/config.yaml")

In [None]:
sage_session = sagemaker.session.Session()
s3_bucket = config.S3_BUCKET  
s3_output_path = 's3://{}/'.format(s3_bucket)
print("S3 bucket path: {}".format(s3_output_path))

# run in local_mode on this machine, or as a SageMaker TrainingJob
local_mode = False

if local_mode:
    instance_type = 'local'
else:
    instance_type = "ml.c5.xlarge"
    
role = sagemaker.get_execution_role()
print("Using IAM role arn: {}".format(role))
# only run from SageMaker notebook instance
if local_mode:
    !/bin/bash ./setup.sh
cpu_or_gpu = 'gpu' if instance_type.startswith('ml.p') else 'cpu'

In [None]:
# create a descriptive job name 
job_name_prefix = 'HPO-pdm'

In [None]:
metric_definitions = [
    {'Name': 'Epoch', 'Regex': 'Epoch: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'train_loss', 'Regex': 'Train loss: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'train_acc',  'Regex': 'Train acc: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'train_auc',  'Regex': 'Train auc: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'test_loss', 'Regex': 'Test loss: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'test_acc', 'Regex': 'Test acc: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'test_auc', 'Regex': 'Test auc: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
]

In [None]:
from sagemaker.pytorch import PyTorch

# Define your data

In [None]:
print("Using dataset {}".format(config.train_dataset_fn))

In [None]:
from sagemaker.s3 import S3Uploader

key_prefix='fpm-data'
training_data = S3Uploader.upload(config.train_dataset_fn, 's3://{}/{}'.format(s3_bucket, key_prefix))
testing_data = S3Uploader.upload(config.test_dataset_fn, 's3://{}/{}'.format(s3_bucket, key_prefix))

print("Training data: {}".format(training_data))
print("Testing data: {}".format(testing_data))

# HPO

In [None]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner
max_jobs = 20
max_parallel_jobs = 5

In [None]:
hyperparameter_ranges = {
    'lr': ContinuousParameter(1e-5, 1e-2),
    'batch_size': IntegerParameter(16, 256),
    'dropout': ContinuousParameter(0.0, 0.8),
    
    'fc_hidden_units': CategoricalParameter(["[256, 128]", "[256, 128, 128]", "[256, 256, 128]", "[256, 128, 64]"]),
    'conv_channels': CategoricalParameter(["[2, 8, 2]", "[2, 16, 2]", "[2, 16, 16, 2]"]),
}

In [None]:
estimator = PyTorch(entry_point="train.py",
                    source_dir='source',
                    role=role,
                    dependencies=["source/dl_utils"],
                    train_instance_type=instance_type,
                    train_instance_count=1,
                    output_path=s3_output_path,
                    framework_version="1.5.0",
                    py_version='py3',
                    base_job_name=job_name_prefix,
                    metric_definitions=metric_definitions,
                    hyperparameters= {
                        'epoch': 5000,
                        'target_column': config.target_column,
                        'sensor_headers': json.dumps(config.sensor_headers),
                        'train_input_filename': os.path.basename(config.train_dataset_fn),
                        'test_input_filename': os.path.basename(config.test_dataset_fn),
                        }
                     )

if local_mode:
    estimator.fit({'train': training_data, 'test': testing_data})

In [None]:
tuner = HyperparameterTuner(estimator,
                            objective_metric_name='test_auc',
                            objective_type='Maximize',
                            hyperparameter_ranges=hyperparameter_ranges,
                            metric_definitions=metric_definitions,
                            max_jobs=max_jobs,
                            max_parallel_jobs=max_parallel_jobs,
                            base_tuning_job_name=job_name_prefix)
tuner.fit({'train': training_data, 'test': testing_data})

# Save the HPO job name
hpo_job_name = tuner.describe()['HyperParameterTuningJobName']
if "hpo_job_name" in config.__dict__:
    !sed -i 's/hpo_job_name: .*/hpo_job_name: \"{hpo_job_name}\"/' config/config.yaml
else:
    !echo -e "\n" >> config/config.yaml
    !echo "hpo_job_name: \"$hpo_job_name\"" >> config/config.yaml    