In [15]:
%matplotlib inline

import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.datasets import load_boston
import sklearn.model_selection

In [16]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.predictor import csv_serializer

# This is an object that represents the SageMaker session that we are currently operating in. This
# object contains some useful information that we will need to access later such as our region.
sagemaker_session = sagemaker.Session()

# This is an object that represents the IAM role that we are currently assigned. When we construct
# and launch the training job later we will need to tell it what IAM role it should have. Since our
# use case is relatively simple we will simply assign the training job the role we currently have.
role = get_execution_role()

In [17]:
boston = load_boston()

In [18]:
X_bos_pd = pd.DataFrame(boston.data, columns=boston.feature_names)
Y_bos_pd = pd.DataFrame(boston.target)

# We split the dataset into 2/3 training and 1/3 testing sets.
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X_bos_pd, Y_bos_pd, test_size=0.33)

# Then we split the training set further into 2/3 training and 1/3 validation sets.
X_train, X_val, Y_train, Y_val = sklearn.model_selection.train_test_split(X_train, Y_train, test_size=0.33)

In [19]:
data_dir = './data/boston'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

In [20]:
train = pd.concat([Y_train,X_train],axis=1)
train.to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)

In [21]:
prefix='sklearn-boston'
WORK_DIRECTORY = 'data/boston'

train_input = sagemaker_session.upload_data(WORK_DIRECTORY, key_prefix="{}/{}".format(prefix, WORK_DIRECTORY) )

In [22]:
from sagemaker.sklearn.estimator import SKLearn

script_path = 'sklearn-hyperparameter-tune.py'

sklearn = SKLearn(
    entry_point=script_path,
    train_instance_type="ml.c4.xlarge",
    role=role,
    sagemaker_session=sagemaker_session)

This is not the latest supported version. If you would like to use version 0.23-1, please add framework_version=0.23-1 to your constructor.


In [23]:
sklearn.fit({'train': train_input})

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


2020-07-19 03:34:22 Starting - Starting the training job...
2020-07-19 03:34:24 Starting - Launching requested ML instances.........
2020-07-19 03:35:56 Starting - Preparing the instances for training...
2020-07-19 03:36:43 Downloading - Downloading input data...
2020-07-19 03:37:14 Training - Downloading the training image..[34m2020-07-19 03:37:28,551 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-07-19 03:37:28,553 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-07-19 03:37:28,564 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-07-19 03:37:28,866 sagemaker-containers INFO     Module sklearn-hyperparameter-tune does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-07-19 03:37:28,866 sagemaker-containers INFO     Generating setup.cfg[0m
[34m2020-07-19 03:37:28,866 sagemaker-containers INFO     Generating MANIFEST.in[0m
[34m20

In [24]:
predictor = sklearn.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge")

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


---------------!

In [25]:
val_pred = predictor.predict(X_val.values)

In [26]:
from sklearn.metrics import mean_squared_error as mse

print(mse(np.log(Y_val),np.log(val_pred))**0.5)

0.21505855206871974
