In [None]:
# Model Development: Converting model to use built in AWS algorithm for deployment
# reference here: https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/lightgbm_catboost_tabular/Amazon_Tabular_Classification_LightGBM_CatBoost.ipynb

In [2]:
!pip install sagemaker ipywidgets --upgrade --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pytest-astropy 0.8.0 requires pytest-cov>=2.0, which is not installed.
pytest-astropy 0.8.0 requires pytest-filter-subpackage>=0.1, which is not installed.
sparkmagic 0.20.4 requires nest-asyncio==1.5.5, but you have nest-asyncio 1.5.6 which is incompatible.
awscli 1.27.103 requires rsa<4.8,>=3.1.2, but you have rsa 4.9 which is incompatible.[0m[31m
[0m

In [3]:
import sagemaker, boto3, json
from sagemaker import get_execution_role

aws_role = get_execution_role()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()

In [12]:
from sagemaker import image_uris, model_uris, script_uris


train_model_id, train_model_version, train_scope = "lightgbm-classification-model", "*", "training"

training_instance_type = "ml.m5.xlarge"

In [5]:
# Retrieve the docker image
train_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    model_id=train_model_id,
    model_version=train_model_version,
    image_scope=train_scope,
    instance_type=training_instance_type,
)

In [6]:
# Retrieve the training script
train_source_uri = script_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, script_scope=train_scope
)

In [7]:
# Retrieve the pre-trained model tarball to further fine-tune
train_model_uri = model_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, model_scope=train_scope
)

In [8]:
# Sample training data is available in this bucket
training_data_bucket = sess.default_bucket()
training_data_prefix = "capstone/train/"

training_dataset_s3_path = f"s3://{training_data_bucket}/{training_data_prefix}"

output_bucket = sess.default_bucket()
output_prefix = "capstone-training"

s3_output_location = f"s3://{output_bucket}/{output_prefix}/output"

In [29]:
from sagemaker import hyperparameters

# Retrieve the default hyper-parameters for fine-tuning the model
hyperparameters = hyperparameters.retrieve_default(
    model_id=train_model_id, model_version=train_model_version
)
#missing equivalents for: subsample, min_child_weight, colsample_bytree

print(hyperparameters)

{'num_boost_round': '5000', 'early_stopping_rounds': '30', 'metric': 'auto', 'learning_rate': '0.009', 'num_leaves': '67', 'feature_fraction': '0.74', 'bagging_fraction': '0.53', 'bagging_freq': '5', 'max_depth': '11', 'min_data_in_leaf': '26', 'max_delta_step': '0.0', 'lambda_l1': '0.0', 'lambda_l2': '0.0', 'boosting': 'gbdt', 'min_gain_to_split': '0.0', 'scale_pos_weight': '1.0', 'tree_learner': 'serial', 'feature_fraction_bynode': '1.0', 'is_unbalance': 'False', 'max_bin': '255', 'num_threads': '0', 'verbosity': '1', 'use_dask': 'False'}


In [30]:
from sagemaker.estimator import Estimator
from sagemaker.utils import name_from_base

training_job_name = name_from_base(f"capstone-final-{train_model_id}-training")

# Create SageMaker Estimator instance
tabular_estimator = Estimator(
    role=aws_role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=200,
    hyperparameters=hyperparameters,
    output_path=s3_output_location,
)

In [31]:
tabular_estimator.fit(
        {"training": training_dataset_s3_path}, logs=True, job_name=training_job_name
    )

INFO:sagemaker:Creating training-job with name: capstone-final-lightgbm-classification--2023-04-02-21-31-29-192


2023-04-02 21:31:31 Starting - Starting the training job...
2023-04-02 21:31:47 Starting - Preparing the instances for training...
2023-04-02 21:32:28 Downloading - Downloading input data...
2023-04-02 21:32:48 Training - Downloading the training image...
2023-04-02 21:33:19 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2023-04-02 21:33:25,210 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2023-04-02 21:33:25,211 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-02 21:33:25,220 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2023-04-02 21:33:25,221 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2023-04-02 21:33:25,589 sagemaker-training-toolkit

In [32]:
inference_instance_type = "ml.m5.large"

In [33]:
# Retrieve the inference docker container uri
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=train_model_id,
    model_version=train_model_version,
    instance_type=inference_instance_type,
)

In [34]:
# Retrieve the inference script uri
deploy_source_uri = script_uris.retrieve(
    model_id=train_model_id, model_version=train_model_version, script_scope="inference"
)

In [35]:
endpoint_name = name_from_base(f"capstone-example-{train_model_id}-")

In [36]:
predictor = (tabular_estimator).deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    entry_point="inference.py",
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    endpoint_name=endpoint_name,
)

INFO:sagemaker:Creating model with name: sagemaker-jumpstart-2023-04-02-21-34-58-633
INFO:sagemaker:Creating endpoint-config with name capstone-example-lightgbm-classificatio-2023-04-02-21-34-58-569
INFO:sagemaker:Creating endpoint with name capstone-example-lightgbm-classificatio-2023-04-02-21-34-58-569


-----!