In [1]:
import sagemaker
import boto3
import os
import numpy as np
import seaborn as sns

sns.set_context("talk")

BUCKET = 'project05-capstone-vexenta'
role = sagemaker.get_execution_role()

In [2]:
model_output_dir = f"s3://{BUCKET}/model/hp-tuning/model.tar.gz"
input_train = f"s3://{BUCKET}/data/model-input/train/df_train_rfe.csv"
input_test = f"s3://{BUCKET}/data/model-input/test/df_test_rfe.csv"

os.environ["SM_MODEL_DIR"] = model_output_dir
os.environ["SM_CHANNEL_TRAIN"] = input_train
os.environ["SM_CHANNEL_TEST"] = input_test

In [3]:
hyperparameters = {
    '_tuning_objective_metric': '"cv f1-score"',
    'max_depth': '"30"',
    'min_samples_split': '"2"',
    'n_estimators': '"300"',
    'sagemaker_container_log_level': '20',
    'sagemaker_estimator_class_name': '"SKLearn"',
    'sagemaker_estimator_module': '"sagemaker.sklearn.estimator"',
    'sagemaker_job_name': '"sagemaker-scikit-learn-2022-01-04-00-40-29-497"',
    'sagemaker_program': '"train-rf.py"',
    'sagemaker_region': '"us-east-1"',
    'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-04-00-40-29-497/source/sourcedir.tar.gz"'
}

## reformat input
for key in ["min_samples_split","n_estimators","max_depth"]:
    if not isinstance(hyperparameters[key], int):
        hyperparameters[key] = int(hyperparameters[key].replace('"',''))
        
hyperparameters

{'_tuning_objective_metric': '"cv f1-score"',
 'max_depth': 30,
 'min_samples_split': 2,
 'n_estimators': 300,
 'sagemaker_container_log_level': '20',
 'sagemaker_estimator_class_name': '"SKLearn"',
 'sagemaker_estimator_module': '"sagemaker.sklearn.estimator"',
 'sagemaker_job_name': '"sagemaker-scikit-learn-2022-01-04-00-40-29-497"',
 'sagemaker_program': '"train-rf.py"',
 'sagemaker_region': '"us-east-1"',
 'sagemaker_submit_directory': '"s3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-04-00-40-29-497/source/sourcedir.tar.gz"'}

In [5]:
## create estimators for your HPs
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(
    entry_point="../../src/modelling/train-rf.py",
    role=role,
    py_version='py3',
    framework_version="0.20.0",
    instance_count=1,
    instance_type="ml.m5.2xlarge",
    hyperparameters=hyperparameters,
)

## Fit your model
estimator.fit(
    inputs={
        "train": input_train, 
        "test": input_test,
    },
    wait=True
)

2022-01-04 01:48:38 Starting - Starting the training job...
2022-01-04 01:49:01 Starting - Launching requested ML instancesProfilerReport-1641260918: InProgress
......
2022-01-04 01:50:01 Starting - Preparing the instances for training......
2022-01-04 01:51:06 Downloading - Downloading input data...
2022-01-04 01:51:35 Training - Downloading the training image...
2022-01-04 01:52:02 Training - Training image download completed. Training in progress.[34m2022-01-04 01:51:50,680 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-01-04 01:51:50,683 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-01-04 01:51:50,691 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-01-04 01:51:51,040 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-01-04 01:51:51,051 sagemaker-training-toolkit INFO     No GPUs detected (no

In [6]:
estimator.model_data

's3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-04-01-48-37-966/output/model.tar.gz'

In [8]:
# ## deploy endpoint -- failed, must provide model_fn
# predictor = estimator.deploy(
#     initial_instance_count=1,
#     instance_type='ml.m5.large'
# )

In [11]:
## live endpoint: prepare preprocessing logic as functions, then fit into the endpoint
## to enable this, MUST prepare endpoint.py (with model_fn, etc.)
# model_object = 

## batch transform? generate predictions on df_test_rfe.csv

from sagemaker.sklearn.model import SKLearnModel

model_location = 's3://sagemaker-us-east-1-567220378588/sagemaker-scikit-learn-2022-01-04-01-48-37-966/output/model.tar.gz'
sklearn_model = SKLearnModel(
    model_data=model_location, 
    role=role,
    entry_point='../../src/modelling/inference.py',
    py_version='py3',
    framework_version='0.20.0',
#     predictor_cls=ImagePredictor
)

predictor = sklearn_model.deploy(
    initial_instance_count=1, 
    instance_type='ml.m5.large'
)

predictor

-----!

<sagemaker.sklearn.model.SKLearnPredictor at 0x7fb4a13c3850>

In [12]:
predictor

<sagemaker.sklearn.model.SKLearnPredictor at 0x7fb4a13c3850>

In [13]:
predictor.delete_endpoint()