## Load required libraries

In [1]:
import boto3
import sagemaker
from sagemaker.xgboost.estimator import XGBoost
from sagemaker.session import Session

sess = sagemaker.Session()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

## Create an XGBoost estimator

In [2]:
# Construct a SageMaker estimator that calls the xgboost-container

from sagemaker.debugger import Rule, rule_configs
from sagemaker import image_uris

bucket = "ads508-team4-xgboost"
prefix = "models"
s3_output_location='s3://{}/{}/{}'.format(bucket, prefix, 'xgboost')

# Set up container

container = sagemaker.image_uris.retrieve("xgboost", region, "1.2-1")
print(container)

xgb_model = sagemaker.estimator.Estimator(
    image_uri = container,
    role = role,
    instance_count = 1,
    instance_type = 'ml.m5.large',
    volume_size = 5,
    output_path = s3_output_location,
    sagemaker_session = sagemaker.Session(),
    rules = [Rule.sagemaker(rule_configs.create_xgboost_report())]
)

683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.2-1


## Set hyperparameters for xgboost

In [3]:
xgb_model.set_hyperparameters(
    max_depth = 5,
    eta = 0.2,
    gamma = 4,
    min_child_weight = 6,
    subsample = 0.7,
    objective = "multi:softmax",
    num_round = 20,
    num_class = 8
)

## Set path for input files

In [6]:
from sagemaker.session import TrainingInput

content_type = "csv"

train_input = TrainingInput('s3://ads508-team4-split/train/df_train.csv',content_type = content_type)
validation_input = TrainingInput('s3://ads508-team4-split/validation/df_validation.csv', content_type = content_type)
test_input = TrainingInput('s3://ads508-team4-split/test/df_test.csv', content_type = content_type)

## Start Training

In [7]:
xgb_model.fit({"train": train_input, "train": train_input}, wait=True)

2022-03-30 03:32:35 Starting - Starting the training job...
2022-03-30 03:32:38 Starting - Launching requested ML instancesCreateXgboostReport: InProgress
ProfilerReport-1648611155: InProgress
.........
2022-03-30 03:34:35 Starting - Preparing the instances for training......
2022-03-30 03:35:35 Downloading - Downloading input data...
2022-03-30 03:36:07 Training - Downloading the training image......
2022-03-30 03:37:05 Training - Training image download completed. Training in progress.[34m[2022-03-30 03:36:47.656 ip-10-0-91-44.ec2.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in a

# Show the name of the training job

In [8]:
training_job_name = xgb_model.latest_training_job.name
print("Training Job Name:  {}".format(training_job_name))

Training Job Name:  sagemaker-xgboost-2022-03-30-03-32-35-528


# Show training job metrics

In [9]:
xgb_model.training_job_analytics.dataframe()



Unnamed: 0,timestamp,metric_name,value
0,0.0,train:merror,0.0
