In [2]:
import sagemaker
import boto3
import os
import numpy as np
import seaborn as sns

sns.set_context("talk")

BUCKET = 'project05-capstone-vexenta'
role = sagemaker.get_execution_role()

# Train Model

Using best hyperparameters from `notebook/sagemaker/00-sm-hp-tuning.ipynb`.

In [3]:
model_output_dir = f"s3://{BUCKET}/model/hp-tuning/model.tar.gz"
input_train = f"s3://{BUCKET}/data/model-input/train/df_train_rfe.csv"
input_test = f"s3://{BUCKET}/data/model-input/test/df_test_rfe.csv"

os.environ["SM_MODEL_DIR"] = model_output_dir
os.environ["SM_CHANNEL_TRAIN"] = input_train
os.environ["SM_CHANNEL_TEST"] = input_test

In [4]:
hyperparameters = {
    '_tuning_objective_metric': '"cv f1-score"',
    'max_depth': '"30"',
    'min_samples_split': '"2"',
    'n_estimators': '"300"',
    'sagemaker_container_log_level': '20',
    'sagemaker_estimator_class_name': '"SKLearn"',
    'sagemaker_estimator_module': '"sagemaker.sklearn.estimator"',
    'sagemaker_job_name': '"sagemaker-scikit-learn-2022-01-04-00-40-29-497"',
    'sagemaker_program': '"train-rf.py"',
    'sagemaker_region': '"us-east-1"',
    'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-04-00-40-29-497/source/sourcedir.tar.gz"'
}

## reformat input
for key in ["min_samples_split","n_estimators","max_depth"]:
    if not isinstance(hyperparameters[key], int):
        hyperparameters[key] = int(hyperparameters[key].replace('"',''))
        
hyperparameters

{'_tuning_objective_metric': '"cv f1-score"',
 'max_depth': 30,
 'min_samples_split': 2,
 'n_estimators': 300,
 'sagemaker_container_log_level': '20',
 'sagemaker_estimator_class_name': '"SKLearn"',
 'sagemaker_estimator_module': '"sagemaker.sklearn.estimator"',
 'sagemaker_job_name': '"sagemaker-scikit-learn-2022-01-04-00-40-29-497"',
 'sagemaker_program': '"train-rf.py"',
 'sagemaker_region': '"us-east-1"',
 'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-04-00-40-29-497/source/sourcedir.tar.gz"'}

In [5]:
## create estimators for your HPs
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(
    entry_point="../../src/modelling/train-rf.py",
    role=role,
    py_version='py3',
    framework_version="0.20.0",
    instance_count=1,
    instance_type="ml.m5.2xlarge",
    hyperparameters=hyperparameters,
)

## Fit your model
estimator.fit(
    inputs={
        "train": input_train, 
        "test": input_test,
    },
    wait=True
)

2022-01-04 21:39:00 Starting - Starting the training job...
2022-01-04 21:39:14 Starting - Launching requested ML instancesProfilerReport-1641332339: InProgress
......
2022-01-04 21:40:24 Starting - Preparing the instances for training.........
2022-01-04 21:42:01 Downloading - Downloading input data
2022-01-04 21:42:01 Training - Downloading the training image...
2022-01-04 21:42:27 Training - Training image download completed. Training in progress..[34m2022-01-04 21:42:28,094 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-01-04 21:42:28,098 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-01-04 21:42:28,106 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-01-04 21:42:28,431 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-01-04 21:42:28,443 sagemaker-training-toolkit INFO     No GPUs detected (n

In [6]:
estimator.model_data

's3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-04-21-38-59-659/output/model.tar.gz'

# Deploy model as a Sagemaker Endpoint

In [35]:
## live endpoint: prepare preprocessing logic as functions, then fit into the endpoint
## to enable this, MUST prepare endpoint.py (with model_fn, etc.)

## batch transform? generate predictions on df_test_rfe.csv

from sagemaker.sklearn.model import SKLearnModel
from sagemaker.serializers import JSONSerializer, CSVSerializer
from sagemaker.deserializers import JSONDeserializer, CSVDeserializer

## TO DO: update with the latest model object (stores feature names in the model object)
model_location = 's3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-04-21-38-59-659/output/model.tar.gz'
sklearn_model = SKLearnModel(
    model_data=model_location, 
    role=role,
    entry_point='../../src/modelling/inference.py',
    py_version='py3',
    framework_version='0.20.0',
)

predictor = sklearn_model.deploy(
    initial_instance_count=1, 
    instance_type='ml.m5.large',
    serializer=JSONSerializer, # CSVSerializer,
    deserializer=JSONDeserializer, #CSVDeserializer,
)

predictor

------!

<sagemaker.sklearn.model.SKLearnPredictor at 0x7f90d94c6d10>

In [36]:
predictor

<sagemaker.sklearn.model.SKLearnPredictor at 0x7f90d94c6d10>

In [37]:
import pandas as pd

## sample input to test the endpoint
payload = {
    "sessionNo": 101,
    "startHour": 4,
    "startWeekday": 7,
    "duration": 0,
    "cCount": 2,
    "cMinPrice": 30,
    "cMaxPrice": 40,
    "cSumPrice": 70,
    "bCount": 1,
    "bMinPrice": 30,
    "bMaxPrice": 30,
    "bSumPrice": 30,
    "bStep": "?",
    "onlineStatus": "?",
    "availability": "?",
    "customerNo": 39,
    "maxVal": 200,
    "customerScore": 65,
    "accountLifetime": 30,
    "payments": 2,
    "age": 39,
    "address": 1,
    "lastOrder": 30,
}

df_input = pd.json_normalize(payload)

In [38]:
response = predictor.predict(
    df_input,
    initial_args={
        "ContentType": "text/csv"
    }
)

print(response)

TypeError: can only join an iterable

In [39]:
response = predictor.predict(
    payload,
    initial_args={
        "ContentType": "application/json"
    }
)

print(response)

TypeError: can only join an iterable

In [40]:
predictor.delete_endpoint()