In [1]:
# Import
import numpy as np
import pandas as pd
import os
import boto3
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
import sagemaker

session = sagemaker.Session()
role = sagemaker.get_execution_role()
region = session.boto_region_name
bucket = 'old-man-mcbucket'
os.environ["DEFAULT_S3_BUCKET"] = bucket

In [14]:
from sagemaker import image_uris, model_uris, script_uris
from sagemaker.utils import name_from_base, unique_name_from_base

train_model_id, train_model_version, train_scope = (
    "autogluon-classification-ensemble",
    "*",
    "training",
)
training_instance_type = "ml.m5.2xlarge"

# Retrieve the docker image
train_image_uri = image_uris.retrieve(
    region=region,
    framework=None,
    model_id=train_model_id,
    model_version=train_model_version,
    image_scope=train_scope,
    instance_type=training_instance_type,
)

# Retrieve the training script
train_source_uri = script_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, script_scope=train_scope
)
# Retrieve the pre-trained model tarball to further fine-tune. In tabular case, however, the pre-trained model tarball is dummy and fine-tune means training from scratch.
train_model_uri = model_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, model_scope=train_scope
)


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


In [15]:
s3_prefix = f"train_data"
training_dataset_s3_path = f"s3://{bucket}/{s3_prefix}/"
output_path = f"s3://{bucket}/{s3_prefix}/output/"

In [16]:
# Custom AWS wrappers built here
# cited 2023-02-20
# https://github.com/aws/amazon-sagemaker-examples/tree/main/advanced_functionality/autogluon-tabular-containers

from ag_model import AutoGluonSagemakerEstimator

ag = AutoGluonSagemakerEstimator(
    role=role,
    entry_point="train_script.py",
    region=region,
    instance_count=1,
    instance_type=training_instance_type,
    framework_version="0.6",
    py_version="py38",
    base_job_name="autogluon-tabular-train",
    disable_profiler=True,
    debugger_hook_config=False,
)


In [17]:
job_name = unique_name_from_base("base-ag-model")
ag.fit({"training": training_dataset_s3_path}, logs=True, job_name=job_name)

INFO:sagemaker:Creating training-job with name: base-ag-model-1676925550-b5c7


2023-02-20 20:39:11 Starting - Starting the training job...
2023-02-20 20:39:26 Starting - Preparing the instances for training......
2023-02-20 20:40:35 Downloading - Downloading input data
2023-02-20 20:40:35 Training - Downloading the training image......
2023-02-20 20:41:26 Training - Training image download completed. Training in progress....
2023-02-20 20:42:08 Uploading - Uploading generated training model
2023-02-20 20:42:08 Failed - Training job failed
[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2023-02-20 20:41:55,394 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2023-02-20 20:41:55,396 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-02-20 20:41:55,398 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2023-02-20 20:41:55,408 sagemaker_py

UnexpectedStatusException: Error for Training job base-ag-model-1676925550-b5c7: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
ExitCode 1
ErrorMessage "FileNotFoundError: [Errno 2] No such file or directory: '/opt/ml/input/data/training/train.csv'"
Command "/opt/conda/bin/python3.8 train_script.py", exit code: 1

In [11]:
from sagemaker.estimator import Estimator

training_job_name = name_from_base(f"base-model-{train_model_id}-training")

# Create SageMaker Estimator instance
tabular_estimator = Estimator(
    role=role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="train_script.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=600,
    output_path=output_path,
)

# Launch a SageMaker Training job by passing s3 path of the training data
tabular_estimator.fit({"training": training_dataset_s3_path}, logs=True, job_name=training_job_name)