In [1]:
import sagemaker
import boto3
import os
import numpy as np
import seaborn as sns

sns.set_context("talk")

BUCKET = 'project05-capstone-vexenta'

In [2]:
model_output_dir = f"s3://{BUCKET}/model/hp-tuning/model.tar.gz"
input_train = f"s3://{BUCKET}/data/model-input/train/df_train_rfe.csv"
input_test = f"s3://{BUCKET}/data/model-input/test/df_test_rfe.csv"

os.environ["SM_MODEL_DIR"] = model_output_dir
os.environ["SM_CHANNEL_TRAIN"] = input_train
os.environ["SM_CHANNEL_TEST"] = input_test

In [3]:
from sagemaker.tuner import (
    IntegerParameter,
    CategoricalParameter,
    ContinuousParameter,
    HyperparameterTuner,
)

role = sagemaker.get_execution_role()

## declare your HP ranges, metrics etc.
hyperparameter_ranges = {
    "n_estimators": CategoricalParameter([100, 150, 200, 300]),
    "max_depth": CategoricalParameter([None, 15, 20, 25, 30]),
    "min_samples_split": CategoricalParameter([2, 10, 20, 30])
}

objective_metric_name = "cv f1-score"
objective_type = "Maximize"
metric_definitions = [{"Name": "cv f1-score", "Regex": "CV F1-score: ([0-9\\.]+)"}]

In [4]:
## create estimators for your HPs
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(
    entry_point="../../src/modelling/train-rf.py",
    role=role,
    py_version='py3',
    framework_version="0.20.0",
    instance_count=1,
    instance_type="ml.m5.2xlarge",
)

## set hp tuner
tuner = HyperparameterTuner(
    estimator=estimator,
    objective_metric_name=objective_metric_name,
    hyperparameter_ranges=hyperparameter_ranges,
    metric_definitions=metric_definitions,
    max_jobs=6, 
    max_parallel_jobs=2,
    objective_type=objective_type,
    base_tuning_job_name='rf-hp-tuning'
)

In [5]:
## Fit your HP Tuner
tuner.fit(
    inputs={
        "train": input_train, 
        "test": input_test,
    },
    wait=True
)

...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................!


In [6]:
tuner.best_training_job()

'rf-hp-tuning-220103-2302-006-357deda1'

In [7]:
## get the best estimators and the best HPs
best_estimator = tuner.best_estimator()
best_estimator.hyperparameters()


2022-01-03 23:41:41 Starting - Preparing the instances for training
2022-01-03 23:41:41 Downloading - Downloading input data
2022-01-03 23:41:41 Training - Training image download completed. Training in progress.
2022-01-03 23:41:41 Uploading - Uploading generated training model
2022-01-03 23:41:41 Completed - Training job completed


{'_tuning_objective_metric': '"cv f1-score"',
 'max_depth': '"20"',
 'min_samples_split': '"20"',
 'n_estimators': '"300"',
 'sagemaker_container_log_level': '20',
 'sagemaker_estimator_class_name': '"SKLearn"',
 'sagemaker_estimator_module': '"sagemaker.sklearn.estimator"',
 'sagemaker_job_name': '"sagemaker-scikit-learn-2022-01-03-23-02-40-217"',
 'sagemaker_program': '"train-rf.py"',
 'sagemaker_region': '"us-east-1"',
 'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-03-23-02-40-217/source/sourcedir.tar.gz"'}

In [9]:
hyperparameters = best_estimator.hyperparameters()
hyperparameters

{'_tuning_objective_metric': '"cv f1-score"',
 'max_depth': '"20"',
 'min_samples_split': '"20"',
 'n_estimators': '"300"',
 'sagemaker_container_log_level': '20',
 'sagemaker_estimator_class_name': '"SKLearn"',
 'sagemaker_estimator_module': '"sagemaker.sklearn.estimator"',
 'sagemaker_job_name': '"sagemaker-scikit-learn-2022-01-03-23-02-40-217"',
 'sagemaker_program': '"train-rf.py"',
 'sagemaker_region': '"us-east-1"',
 'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-03-23-02-40-217/source/sourcedir.tar.gz"'}