In [1]:
import sagemaker

region = sagemaker.Session().boto_region_name
print("AWS Region: {}".format(region))

role = sagemaker.get_execution_role()
print("RoleArn: {}".format(role))

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
AWS Region: ap-northeast-2
RoleArn: arn:aws:iam::058264479685:role/service-role/AmazonSageMaker-ExecutionRole-20241030T125455


In [2]:
from sagemaker.debugger import Rule, ProfilerRule, rule_configs
from sagemaker.session import TrainingInput

bucket = sagemaker.Session().default_bucket()
prefix = "demo-sagemaker-xgboost"

s3_output_location='s3://{}/{}/{}'.format(bucket, prefix, 'xgboost_model')

# container image URI.
container=sagemaker.image_uris.retrieve("xgboost", region, "1.2-1")
print(container)

""" 1) ml.m4.xlarge instance has 4 CPUs, 16 GB of memory
    2) volume_size – The size, in GB, of the EBS storage volume to attach to the training instance
    3) sagemaker_session – The session object that manages interactions with SageMaker API operations and other AWS service that the training job uses
"""
xgb_model=sagemaker.estimator.Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type='ml.m4.xlarge',
    volume_size=5,
    output_path=s3_output_location,
    sagemaker_session=sagemaker.Session(),
    rules=[
        Rule.sagemaker(rule_configs.create_xgboost_report()),
        ProfilerRule.sagemaker(rule_configs.ProfilerReport())
    ]
)

366743142698.dkr.ecr.ap-northeast-2.amazonaws.com/sagemaker-xgboost:1.2-1


In [3]:
xgb_model.set_hyperparameters(
    max_depth = 6,
    eta = 0.2,
    gamma = 4,
    min_child_weight = 6,
    subsample = 0.7,
    objective = "binary:logistic",
    num_round = 800
)

In [4]:
# TrainingInput class to configure a data input flow for training
from sagemaker.session import TrainingInput

train_input = TrainingInput(
    "s3://{}/{}/{}".format(bucket, prefix, "data/train.csv"), content_type="csv"
)
validation_input = TrainingInput(
    "s3://{}/{}/{}".format(bucket, prefix, "data/validation.csv"), content_type="csv"
)

In [5]:
xgb_model.fit({"train": train_input, "validation": validation_input}, wait=True)

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2024-10-30-04-45-14-566


2024-10-30 04:45:19 Starting - Starting the training job...
2024-10-30 04:45:43 Starting - Preparing the instances for trainingCreateXgboostReport: InProgress
ProfilerReport: InProgress
...
2024-10-30 04:46:03 Downloading - Downloading input data...
2024-10-30 04:46:43 Downloading - Downloading the training image...
2024-10-30 04:47:18 Training - Training image download completed. Training in progress...[34m[2024-10-30 04:47:29.127 ip-10-0-155-117.ap-northeast-2.compute.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV i

In [6]:
rule_output_path = xgb_model.output_path + "/" + xgb_model.latest_training_job.job_name + "/rule-output"
! aws s3 ls {rule_output_path} --recursive

2024-10-30 04:48:35        317 demo-sagemaker-xgboost/xgboost_model/sagemaker-xgboost-2024-10-30-04-45-14-566/rule-output/CreateXgboostReport/xgboost-reports/ConfusionMatrix.json
2024-10-30 04:48:35        734 demo-sagemaker-xgboost/xgboost_model/sagemaker-xgboost-2024-10-30-04-45-14-566/rule-output/CreateXgboostReport/xgboost-reports/EvaluationMetrics.json
2024-10-30 04:48:35       2689 demo-sagemaker-xgboost/xgboost_model/sagemaker-xgboost-2024-10-30-04-45-14-566/rule-output/CreateXgboostReport/xgboost-reports/FeatureImportance.json
2024-10-30 04:48:35       7663 demo-sagemaker-xgboost/xgboost_model/sagemaker-xgboost-2024-10-30-04-45-14-566/rule-output/CreateXgboostReport/xgboost-reports/IterativeClassAccuracy.json
2024-10-30 04:48:35        180 demo-sagemaker-xgboost/xgboost_model/sagemaker-xgboost-2024-10-30-04-45-14-566/rule-output/CreateXgboostReport/xgboost-reports/LabelsDistribution.json
2024-10-30 04:48:35      28922 demo-sagemaker-xgboost/xgboost_model/sagemaker-xgboost-2024-