In [2]:
import boto3
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import sagemaker
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
from sagemaker.serializers import CSVSerializer
from sagemaker import image_uris
from sagemaker.session import Session
import time
import itertools

In [3]:
bucket = '<Bucket Name>'
prefix = 'sagemaker/xgboost_credit_risk'

In [None]:
# Specifying the training and validation data
s3_input_train = TrainingInput(s3_data='s3://{}/{}/train'.format(bucket, prefix), content_type='csv')
s3_input_validation = TrainingInput(s3_data='s3://{}/{}/validation/'.format(bucket, prefix), content_type='csv')

inputs = {'train': s3_input_train, 'validation': s3_input_validation}

In [None]:
sess = boto3.Session()
sm = sess.client("sagemaker")
role = get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)

# Set Up Experiments

In [3]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker

ModuleNotFoundError: No module named 'smexperiments'

In [None]:
# Creating an experiment
xgboost_credit_risk_experiment = Experiment.create(
    experiment_name=f"xgboost-credit-risk-experiment-{int(time.time())}",
    description="Training on credit risk dataset",
    sagemaker_boto_client=sm)

print(xgboost_credit_risk_experiment)

In [None]:
#Hyper-Parameter values that you want to experiment with
hyperparam_options = {"eta": [0.1, 0.5], "num_round": [10, 20]}

hypnames, hypvalues = zip(*hyperparam_options.items())
trial_hyperparameter_set = [dict(zip(hypnames, h)) for h in itertools.product(*hypvalues)]
trial_hyperparameter_set

In [None]:
#Getting the latest Xgboost container image

from sagemaker.amazon.amazon_estimator import get_image_uri 
uri = get_image_uri(boto3.Session().region_name, 'xgboost', repo_version='latest')

In [None]:
run_number = 1

#Looping over the hyper-parameter set
for trial_hyp in trial_hyperparameter_set:
    # Combine static hyperparameters and trial specific hyperparameters
    hyperparams = trial_hyp

    # Create unique job name with hyperparameter and time
    time_append = int(time.time())
    hyp_append = "-".join([str(elm).replace(".", "-") for elm in trial_hyp.values()])
    training_job_name = f"xgboost-credit-risk-training-{hyp_append}-{time_append}"
    trial_name = f"trial-xgboost-credit-risk-training-{hyp_append}-{time_append}"
    trial_desc = f"my-xgboost-credit-risk-run-{run_number}"

    # Create a new Trial and associate Tracker to it
    xgboost_credit_risk_trial = Trial.create(
        trial_name=trial_name,
        experiment_name=xgboost_credit_risk_experiment.experiment_name,
        sagemaker_boto_client=sm,
        tags=[{"Key": "trial-desc", "Value": trial_desc}])

    # Create an experiment config that associates training job to the Trial
    experiment_config = {
        "ExperimentName": xgboost_credit_risk_experiment.experiment_name,
        "TrialName": xgboost_credit_risk_trial.trial_name,
        "TrialComponentDisplayName": training_job_name}
    
    xgboost_credit_risk_estimator = sagemaker.estimator.Estimator(uri,
                                    role, 
                                    instance_count=1, 
                                    instance_type='ml.m4.xlarge',
                                    output_path='s3://{}/{}/output'.format(bucket, prefix),
                                    sagemaker_session=sagemaker_session,
                                    hyperparameters=hyperparams,
                                    enable_sagemaker_metrics=True,
                                    tags=[{"Key": "trial-desc", "Value": trial_desc}])


    # Launch a training job
    xgboost_credit_risk_estimator.fit(inputs, job_name=training_job_name, experiment_config=experiment_config)

    # give it a while before dispatching the next training job
    time.sleep(2)
    run_number = run_number + 1

In [None]:
from sagemaker.analytics import ExperimentAnalytics

experiment_name = xgboost_credit_risk_experiment.experiment_name

trial_component_analytics = ExperimentAnalytics(sagemaker_session=sagemaker_session, experiment_name=experiment_name)
trial_comp_ds_jobs = trial_component_analytics.dataframe()
trial_comp_ds_jobs