In [10]:
!pip install sagemaker-experiments

Collecting sagemaker-experiments
  Downloading sagemaker_experiments-0.1.42-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sagemaker-experiments
Successfully installed sagemaker-experiments-0.1.42
[0m

In [15]:
import sagemaker
import boto3
import pandas as pd
from datetime import datetime 

from sagemaker import image_uris 
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput
from time import gmtime, strftime
import time
from smexperiments import experiment

region = boto3.Session().region_name

session = sagemaker.Session()
bucket = ''
prefix = ''

role = sagemaker.get_execution_role()

sm = boto3.Session().client(service_name='sagemaker',region_name=region)

# name of experiment
timestep = datetime.now()
timestep = timestep.strftime("%d-%m-%Y-%H-%M-%S")
experiment_name = timestep + "xgboost-cust-churn"

# create experiment
experiment.Experiment.create(
    experiment_name=experiment_name,
    description="Iterative model tuning - altering data engineering steps",
    sagemaker_boto_client=sm,
)

Experiment(sagemaker_boto_client=<botocore.client.SageMaker object at 0x7f48d6723490>,experiment_name='12-01-2023-19-35-34xgboost-cust-churn',description='Iterative model tuning - altering data engineering steps',tags=None,experiment_arn='arn:aws:sagemaker:us-east-2:791580863750:experiment/12-01-2023-19-35-34xgboost-cust-churn',response_metadata={'RequestId': 'd3a5eb4f-ade7-4598-9cd8-025a71d66beb', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'd3a5eb4f-ade7-4598-9cd8-025a71d66beb', 'content-type': 'application/x-amz-json-1.1', 'content-length': '109', 'date': 'Thu, 12 Jan 2023 19:35:34 GMT'}, 'RetryAttempts': 0})

## Select container
https://docs.aws.amazon.com/sagemaker/latest/dg/ecr-us-east-2.html

In [None]:
container = sagemaker.image_uris.retrieve(region=boto3.Session().region_name, framework='xgboost', version='latest')

## Set location for training and validation data

IMPORTANT - XGBoost REQUIRES target column to be first, and there can be NO HEADER column row

In [None]:
s3_input_train = sagemaker.inputs.TrainingInput(s3_data='s3://lawsnic-aiml-east2/kaggle/customerChurn/features/partial/train.csv', content_type='csv')
s3_input_validation = sagemaker.inputs.TrainingInput(s3_data='s3://lawsnic-aiml-east2/kaggle/customerChurn/features/partial/validate.csv', content_type='csv')

## Set up XGBoost algorithm
https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost_hyperparameters.html

https://sagemaker.readthedocs.io/en/stable/frameworks/xgboost/using_xgboost.html#create-an-estimator

In [None]:
from smexperiments.trial import Trial

trial = Trial.create(experiment_name=experiment_name, sagemaker_boto_client=sm)
experiment_config = {
    "ExperimentName": experiment_name,
    "TrialName": trial.trial_name,
    "TrialComponentDisplayName": "Training",
}

sess = sagemaker.Session()

xgb = sagemaker.estimator.Estimator(container,
                                    role, 
                                    instance_count=1, 
                                    instance_type='ml.m4.xlarge',
                                    output_path='s3://{}/{}/output/xgboost'.format(bucket,prefix),
                                    sagemaker_session=sess,
                                    enable_sagemaker_metrics=True)
xgb.set_hyperparameters(max_depth=5,
                        eta=0.2,
                        gamma=4,
                        min_child_weight=6,
                        subsample=0.8,
                        silent=0,
                        objective='binary:logistic',
                        eval_metric='auc',
                        num_round=100)
 

In [None]:
xgb.fit({'train': s3_input_train, 'validation': s3_input_validation},
    experiment_config=experiment_config)

## HPO
https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost-tuning.html

In [None]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner
hyperparameter_ranges = {'eta': ContinuousParameter(0, 1),
                            'min_child_weight': ContinuousParameter(1, 10),
                            'alpha': ContinuousParameter(0, 2),
                            'max_depth': IntegerParameter(1, 10)}

objective_metric_name = 'validation:auc'

tuner = HyperparameterTuner(xgb,
                            objective_metric_name,
                            hyperparameter_ranges,
                            max_jobs=20,
                            max_parallel_jobs=3)

In [None]:
tuner.fit({'train': s3_input_train, 'validation': s3_input_validation})

In [None]:
boto3.client('sagemaker').describe_hyper_parameter_tuning_job(
HyperParameterTuningJobName=tuner.latest_tuning_job.job_name)['HyperParameterTuningJobStatus']

In [None]:
boto3.client('sagemaker').describe_training_job(TrainingJobName=tuner.best_training_job())