In [1]:
import sagemaker
from sagemaker.xgboost.estimator import XGBoost
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput

In [2]:
# initialize hyperparameters
hyperparameters = {
    "objective":"binary:logistic",
    "num_round": "50"
}

In [3]:
# set an output path where the trained model will be saved
bucket = sagemaker.Session().default_bucket()
prefix = 'models'
output_path = 's3://{}/{}/{}/output'.format(bucket, prefix, 'xgb')

In [4]:
# build XGBoost container 
xgboost_container = sagemaker.image_uris.retrieve('xgboost', 'us-east-1', '1.2-2')

In [5]:
# construct a SageMaker estimator that calls the xgboost-container
estimator = sagemaker.estimator.Estimator(
    image_uri=xgboost_container,
    hyperparameters=hyperparameters,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.m5.large',
    output_path=output_path
)

In [6]:
# define the paths to the training and validation datasets
content_type = 'text/csv'
train_input = TrainingInput('s3://{}/{}/{}/'.format(bucket, 'data/train', 'train'), content_type=content_type)
validation_input = TrainingInput('s3://{}/{}/{}/'.format(bucket, 'data/train', 'validation'), content_type=content_type)

In [7]:
estimator.fit({'train': train_input, 'validation': validation_input})

2022-03-25 22:17:00 Starting - Starting the training job...
2022-03-25 22:17:24 Starting - Preparing the instances for trainingProfilerReport-1648246619: InProgress
......
2022-03-25 22:18:29 Downloading - Downloading input data......
2022-03-25 22:19:24 Training - Downloading the training image.........
2022-03-25 22:20:56 Uploading - Uploading generated training model[34m[2022-03-25 22:20:45.175 ip-10-2-255-82.ec2.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2022-03-25:22:20:45:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2022-03-25:22:20:45:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34m[2022-03-25:22:20:45:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2022-03-25:22:20:45:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2022-03-25:22:20:45:INFO] Determined delimiter of CSV input is ','[0m
[34m[2022-03-25:22:20:45:INFO] De