In [30]:
import os
import time
import boto3
import sagemaker
import pandas as pd
## SageMaker dependencies
import sagemaker
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
from sagemaker.image_uris import retrieve

## This instantiates a SageMaker session that we will be operating in.
session = sagemaker.Session()
role = sagemaker.get_execution_role()


# Configuring the built-in Linear Learner algorithm

First, let\'s retrieve the datasets from the previous notebook.

In [31]:
%store -r X_train
%store -r X_test
%store -r X_val
%store -r Y_train
%store -r Y_test
%store -r Y_val

Now we need to do a simple thing: Upload the training and Validation set to S3m since the instances that SageMaker spin up will get the data from there. There is an OPTIONAL step for performance gains to transform to RecordIO format, if you are curious check the documentation!

For Built-in algorithms there is a convention that **the first column is the target column**

In [32]:
data_dir = "data/"
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

prefix = "boston-dataset"
pd.concat([Y_train, X_train], axis=1).to_csv(
    os.path.join(data_dir, "train.csv"), header=False, index=False
)
pd.concat([Y_val, X_val], axis=1).to_csv(
    os.path.join(data_dir, "validation.csv"), header=False, index=False
)

val_location = session.upload_data(os.path.join(data_dir, "validation.csv"), key_prefix=prefix)
train_location = session.upload_data(os.path.join(data_dir, "train.csv"), key_prefix=prefix)

Now we will create a pointer to the data in the Jupyter Lab. You could read from YOUR S3 as well.

In [33]:
s3_input_train = TrainingInput(s3_data=train_location, content_type="text/csv")
s3_input_validation = TrainingInput(s3_data=val_location, content_type="text/csv")

## Set an Estimator

To use a built-in algorithm we only need instantiate the Estimator class appropiatedly. Most of this parameters can be copy-pasted since are standard.
The training job will spin an instance of the given type to run the training command.

In [34]:
container = retrieve(framework='linear-learner', region=session.boto_region_name, version="latest")

model = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://{}/{}/output".format(session.default_bucket(), prefix),
    sagemaker_session=session,
)

Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: latest.


## Configure Hyperparameters

It may be confusing, but SageMaker names hyperparameters as the parameters of the model and what the ML community names hyperparameters.
At this point we will later configure the auto-hyperparameter optimization jobs.

In [35]:
model.set_hyperparameters(
    feature_dim=X_train.shape[1], predictor_type="regressor", mini_batch_size=100
)

## Train


Let's train the model. If we specify the validation set to SageMaker, a bunch of interesting metrics will be outputted.

In [36]:
model.fit({"train": s3_input_train, "validation": s3_input_validation}, wait=True)

2021-09-07 02:45:33 Starting - Starting the training job...
2021-09-07 02:45:35 Starting - Launching requested ML instancesProfilerReport-1630982733: InProgress
......
2021-09-07 02:46:50 Starting - Preparing the instances for training.........
2021-09-07 02:48:31 Downloading - Downloading input data...
2021-09-07 02:48:51 Training - Downloading the training image..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[09/07/2021 02:49:13 INFO 140620455696192] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bia

## Download the model

In [37]:
sagemaker.s3.S3Downloader.download(s3_uri=model.model_data, local_path="./")