In [1]:
%%time

import io
import os
import boto3
import sagemaker
import time

role = sagemaker.get_execution_role()
region = boto3.Session().region_name

# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket here if you wish.
bucket = sagemaker.Session().default_bucket()
prefix = "sagemaker/DEMO-xgboost-inference-script-mode"

CPU times: user 799 ms, sys: 136 ms, total: 934 ms
Wall time: 1.01 s


In [2]:
%%time
s3 = boto3.client("s3")
# Load the dataset
FILE_DATA = "abalone"
s3.download_file(
    "sagemaker-sample-files", f"datasets/tabular/uci_abalone/abalone.libsvm", FILE_DATA
)
sagemaker.Session().upload_data(FILE_DATA, bucket=bucket, key_prefix=prefix + "/train")

CPU times: user 193 ms, sys: 21.9 ms, total: 215 ms
Wall time: 665 ms


's3://sagemaker-us-east-1-930992672261/sagemaker/DEMO-xgboost-inference-script-mode/train/abalone'

In [20]:
from sagemaker.inputs import TrainingInput
from sagemaker.xgboost.estimator import XGBoost


job_name = "DEMO-xgboost-inference-script-mode-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
print("Training job", job_name)

hyperparameters = {
    "max_depth": "5",
    "eta": "0.2",
    "gamma": "4",
    "min_child_weight": "6",
    "subsample": "0.7",
    "objective": "reg:squarederror",
    "num_round": "50",
    "verbosity": "2",
}

instance_type = "ml.c5.xlarge"

xgb_script_mode_estimator = XGBoost(
    entry_point="abalone.py",
    hyperparameters=hyperparameters,
    role=role,
    instance_count=1,
    instance_type=instance_type,
    framework_version="1.5-1",
    output_path="s3://{}/{}/{}/output".format(bucket, prefix, job_name),
)

content_type = "text/csv"
train_input = TrainingInput(
    "s3://{}/{}/{}/".format(bucket, prefix, "train"), content_type=content_type
)

Training job DEMO-xgboost-inference-script-mode-2022-12-30-21-15-22


In [21]:
"s3://{}/{}/{}/".format(bucket, prefix, "train")

's3://sagemaker-us-east-1-930992672261/sagemaker/DEMO-xgboost-inference-script-mode/train/'

In [22]:
xgb_script_mode_estimator.fit({"train": train_input, "validation": train_input}, job_name=job_name)

2022-12-30 21:15:23 Starting - Starting the training job...
2022-12-30 21:15:48 Starting - Preparing the instances for trainingProfilerReport-1672434923: InProgress
.........
2022-12-30 21:17:08 Downloading - Downloading input data..[34m[2022-12-30 21:17:30.687 ip-10-2-124-236.ec2.internal:8 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2022-12-30:21:17:30:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2022-12-30:21:17:30:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2022-12-30:21:17:30:INFO] Invoking user training script.[0m
[34m[2022-12-30:21:17:30:INFO] Module abalone does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m[2022-12-30:21:17:30:INFO] Generating setup.cfg[0m
[34m[2022-12-30:21:17:30:INFO] Generating MANIFEST.in[0m
[34m[2022-12-30:21:17:30:INFO] Installing module with the following command:[0m
[34m/miniconda3/bin/python3 -m pip install . [0m
[34mProcessing /opt/ml/code
  Preparing metada


2022-12-30 21:17:51 Training - Training image download completed. Training in progress.
2022-12-30 21:17:51 Uploading - Uploading generated training model
2022-12-30 21:17:51 Completed - Training job completed
Training seconds: 42
Billable seconds: 42


In [24]:
from sagemaker.xgboost.model import XGBoostModel

model_data = xgb_script_mode_estimator.model_data
print(model_data)

xgb_inference_model = XGBoostModel(
    model_data=model_data,
    role=role,
    entry_point="inference.py",
    framework_version="1.5-1",
)

s3://sagemaker-us-east-1-930992672261/sagemaker/DEMO-xgboost-inference-script-mode/DEMO-xgboost-inference-script-mode-2022-12-30-21-15-22/output/DEMO-xgboost-inference-script-mode-2022-12-30-21-15-22/output/model.tar.gz


's3://sagemaker-us-east-1-930992672261/sagemaker/DEMO-xgboost-inference-script-mode/train/'

In [25]:
xgb_scrip_transformer = xgb_inference_model.transformer(instance_count=1,
                          instance_type='ml.c4.xlarge', accept=content_type)

In [27]:
content_type = "text/libsvm"
xgb_scrip_transformer.transform(data="s3://{}/{}/{}/{}".format(bucket, prefix, "train", "lib_abalone"), content_type=content_type, split_type='Line')


..............................[34m[2022-12-30:21:27:03:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2022-12-30:21:27:03:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2022-12-30:21:27:03:INFO] nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;[0m
[34mworker_rlimit_nofile 4096;[0m
[34mevents {
  worker_connections 2048;[0m
[34m}[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;
  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }
  server {
    listen 8080 deferred;
    client_max_body_size 0;
    keepalive_timeout 3;
    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redirect off;
      proxy_read_timeout 60s;
      proxy_pass http://gunicorn;
    }
    locatio

[32m2022-12-30T21:27:14.998:[sagemaker logs]: MaxConcurrentTransforms=1, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD[0m
[34m[2022-12-30:21:27:16:ERROR] Exception on /invocations [POST][0m
[35m[2022-12-30:21:27:16:ERROR] Exception on /invocations [POST][0m
[34mTraceback (most recent call last):
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_containers/_functions.py", line 93, in wrapper
    return fn(*args, **kwargs)
  File "/opt/ml/code/inference.py", line 43, in predict_fn
    prediction = model.predict(input_data)
  File "/miniconda3/lib/python3.8/site-packages/xgboost/core.py", line 1913, in predict
    _check_call(
  File "/miniconda3/lib/python3.8/site-packages/xgboost/core.py", line 218, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))[0m
[34mxgboost.core.XGBoostError: [21:27:16] ../src/learner.cc:1257: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (8 vs. 9) : Number of columns does not match number of features




UnexpectedStatusException: Error for Transform job sagemaker-xgboost-2022-12-30-21-22-14-717: Failed. Reason: AlgorithmError: See job logs for more information

In [12]:
train_input

<sagemaker.inputs.TrainingInput at 0x7fe0946dc0d0>