In [3]:
import sagemaker
import boto3
import os
import numpy as np
import seaborn as sns

sns.set_context("talk")

BUCKET = 'project05-capstone-vexenta'
role = sagemaker.get_execution_role()

# Train Model

Using best hyperparameters from `notebook/sagemaker/00-sm-hp-tuning.ipynb`.

In [18]:
model_output_dir = f"s3://{BUCKET}/model/hp-tuning/model.tar.gz"
input_train = f"s3://{BUCKET}/data/model-input/train/df_train_rfe.csv"
input_test = f"s3://{BUCKET}/data/model-input/test/df_test_rfe.csv"

os.environ["SM_MODEL_DIR"] = model_output_dir
os.environ["SM_CHANNEL_TRAIN"] = input_train
os.environ["SM_CHANNEL_TEST"] = input_test

In [19]:
hyperparameters = {
    '_tuning_objective_metric': '"cv f1-score"',
    'max_depth': '"30"',
    'min_samples_split': '"2"',
    'n_estimators': '"300"',
    'sagemaker_container_log_level': '20',
    'sagemaker_estimator_class_name': '"SKLearn"',
    'sagemaker_estimator_module': '"sagemaker.sklearn.estimator"',
    'sagemaker_job_name': '"sagemaker-scikit-learn-2022-01-05-01-33-08-441"',
    'sagemaker_program': '"train-rf.py"',
    'sagemaker_region': '"us-east-1"',
    'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-05-01-33-08-441/source/sourcedir.tar.gz"'
}

## reformat input
for key in ["min_samples_split","n_estimators","max_depth"]:
    if not isinstance(hyperparameters[key], int):
        hyperparameters[key] = int(hyperparameters[key].replace('"',''))
        
hyperparameters

{'_tuning_objective_metric': '"cv f1-score"',
 'max_depth': 30,
 'min_samples_split': 2,
 'n_estimators': 300,
 'sagemaker_container_log_level': '20',
 'sagemaker_estimator_class_name': '"SKLearn"',
 'sagemaker_estimator_module': '"sagemaker.sklearn.estimator"',
 'sagemaker_job_name': '"sagemaker-scikit-learn-2022-01-05-01-33-08-441"',
 'sagemaker_program': '"train-rf.py"',
 'sagemaker_region': '"us-east-1"',
 'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-05-01-33-08-441/source/sourcedir.tar.gz"'}

In [20]:
## create estimators for your HPs
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(
    entry_point="../../src/modelling/train-rf.py",
    role=role,
    py_version='py3',
    framework_version="0.20.0",
    instance_count=1,
    instance_type="ml.m5.2xlarge",
    hyperparameters=hyperparameters,
)

## Fit your model
estimator.fit(
    inputs={
        "train": input_train, 
        "test": input_test,
    },
    wait=True
)

2022-01-05 02:18:32 Starting - Starting the training job...
2022-01-05 02:18:56 Starting - Launching requested ML instancesProfilerReport-1641349111: InProgress
......
2022-01-05 02:19:56 Starting - Preparing the instances for training......
2022-01-05 02:21:01 Downloading - Downloading input data...
2022-01-05 02:21:17 Training - Downloading the training image....[34m2022-01-05 02:22:06,464 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-01-05 02:22:06,467 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-01-05 02:22:06,476 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-01-05 02:22:06,754 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-01-05 02:22:13,024 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-01-05 02:22:13,036 sagemaker-training-toolkit 

In [21]:
estimator.model_data

's3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-05-02-18-31-464/output/model.tar.gz'

# Deploy model as a Sagemaker Endpoint

Relevant readings:
- https://github.com/aws/amazon-sagemaker-examples/blob/master/sagemaker-python-sdk/scikit_learn_randomforest/Sklearn_on_SageMaker_end2end.ipynb
- https://dev.to/aws-builders/running-custom-algorithm-in-aws-sagemaker-4jdf

In [31]:
## live endpoint: prepare preprocessing logic as functions, then fit into the endpoint
## to enable this, MUST prepare endpoint.py (with model_fn, etc.)

from sagemaker.sklearn.model import SKLearnModel
from sagemaker.serializers import JSONSerializer, CSVSerializer
from sagemaker.deserializers import JSONDeserializer, CSVDeserializer

model_location = 's3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-05-02-18-31-464/output/model.tar.gz'
sklearn_model = SKLearnModel(
    model_data=model_location, 
    role=role,
#     entry_point='../../src/modelling/inference-trial.py',
    entry_point='../../src/modelling/inference.py',
    py_version='py3',
    framework_version='0.20.0',
)

predictor = sklearn_model.deploy(
    initial_instance_count=1, 
    instance_type='ml.m5.large',
#     serializer=JSONSerializer, # CSVSerializer,
#     deserializer=JSONDeserializer, #CSVDeserializer,
)

predictor

------!

<sagemaker.sklearn.model.SKLearnPredictor at 0x7f8bcfc5fd50>

In [32]:
predictor

<sagemaker.sklearn.model.SKLearnPredictor at 0x7f8bcfc5fd50>

In [33]:
import pandas as pd

## sample input to test the endpoint
payload = {
    "sessionNo": 101,
    "startHour": 4,
    "startWeekday": 7,
    "duration": 0,
    "cCount": 2,
    "cMinPrice": 30,
    "cMaxPrice": 40,
    "cSumPrice": 70,
    "bCount": 1,
    "bMinPrice": 30,
    "bMaxPrice": 30,
    "bSumPrice": 30,
    "bStep": "?",
    "onlineStatus": "?",
    "availability": "?",
    "customerNo": 39,
    "maxVal": 200,
    "customerScore": 65,
    "accountLifetime": 30,
    "payments": 2,
    "age": 39,
    "address": 1,
    "lastOrder": 30,
}

df_input = pd.json_normalize(payload)

In [34]:
response = predictor.predict(
    df_input,
    initial_args={
        "ContentType": "text/csv"
    }
)

print(response)

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (500) from primary with message "<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<title>500 Internal Server Error</title>
<h1>Internal Server Error</h1>
<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/sagemaker-scikit-learn-2022-01-05-08-45-32-539 in account 567220378588 for more information.

In [None]:
import boto3

runtime = boto3.client("sagemaker-runtime")

# csv serialization
response = runtime.invoke_endpoint(
    EndpointName=predictor.endpoint,
#     Body=testX[data.feature_names].to_csv(header=False, index=False).encode("utf-8"),
    Body=df_input.to_csv(header=True, index=False).encode("utf-8"),
    ContentType="text/csv",
)

print(response["Body"].read())

In [None]:
payload2 = [
    {
        "sessionNo": 101,
        "startHour": 4,
        "startWeekday": 7,
        "duration": 0,
        "cCount": 2,
        "cMinPrice": 30,
        "cMaxPrice": 40,
        "cSumPrice": 70,
        "bCount": 1,
        "bMinPrice": 30,
        "bMaxPrice": 30,
        "bSumPrice": 30,
        "bStep": "?",
        "onlineStatus": "?",
        "availability": "?",
        "customerNo": 39,
        "maxVal": 200,
        "customerScore": 65,
        "accountLifetime": 30,
        "payments": 2,
        "age": 39,
        "address": 1,
        "lastOrder": 30,
    }
]

response = predictor.predict(
    payload2,
    initial_args={
        "ContentType": "application/json"
    }
)

print(response)
## https://console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:log-groups/log-group/$252Faws$252Fsagemaker$252FEndpoints$252Fsagemaker-scikit-learn-2022-01-05-02-00-07-418/log-events/AllTraffic$252Fi-05d2b11a49c9594a4

In [30]:
predictor.delete_endpoint()

# Batch transform

In [None]:
## batch transform
from sagemaker.sklearn.model import SKLearnModel

batch_transform_input_path = f"s3://{BUCKET}/data/model-input/test/df_test_rfe.csv"
batch_transform_output_path = f"s3://{BUCKET}/data/model-output/batch-transform/"
model_location = 's3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-05-02-18-31-464/output/model.tar.gz'

sklearn_model = SKLearnModel(
    model_data=model_location, 
    role=role,
    entry_point='../../src/modelling/inference.py',
    py_version='py3',
    framework_version='0.20.0',
)

transformer = sklearn_model.transformer(
    instance_count=1, 
    instance_type='ml.m5.large', 
    output_path=batch_transform_output_path    
)

transformer.transform(
    data=batch_transform_input_path, 
    data_type='S3Prefix',
    content_type='text/csv',
    split_type='Line'
)

transformer.wait()

................................[34mProcessing /opt/ml/code[0m
[34mBuilding wheels for collected packages: inference
  Building wheel for inference (setup.py): started
  Building wheel for inference (setup.py): finished with status 'done'
  Created wheel for inference: filename=inference-1.0.0-py2.py3-none-any.whl size=6515 sha256=262fce414250517e427628a0e1a122456d79c68505fae50b78266cd7b62b0cb7
  Stored in directory: /tmp/pip-ephem-wheel-cache-j0pd5743/wheels/3e/0f/51/2f1df833dd0412c1bc2f5ee56baac195b5be563353d111dca6[0m
[34mSuccessfully built inference[0m
[34mInstalling collected packages: inference[0m
[34mSuccessfully installed inference-1.0.0[0m
  import imp[0m
[34m[2022-01-05 09:17:08 +0000] [31] [INFO] Starting gunicorn 20.1.0[0m
[34m[2022-01-05 09:17:08 +0000] [31] [INFO] Listening at: unix:/tmp/gunicorn.sock (31)[0m
[34m[2022-01-05 09:17:08 +0000] [31] [INFO] Using worker: gevent[0m
[34m[2022-01-05 09:17:08 +0000] [34] [INFO] Booting worker with pid: 34[0m
[3