# Deploy and Validate the Linear Learner

In [1]:
import os
import time
import boto3
import sagemaker
import pandas as pd
## SageMaker dependencies
import sagemaker
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
from sagemaker.image_uris import retrieve

## This instantiates a SageMaker session that we will be operating in.
session = sagemaker.Session()
role = sagemaker.get_execution_role()

At first we will do the same as before, this will simulate a scenario where we will test the model just trained on SageMaker.

If you are curious how to deploy from an already trained job, we will do that on the next lab.

In [2]:
%store -r X_train
%store -r X_test
%store -r X_val
%store -r Y_train
%store -r Y_test
%store -r Y_val

In [3]:
data_dir = "data/"
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

prefix = "boston-dataset"
pd.concat([Y_train, X_train], axis=1).to_csv(
    os.path.join(data_dir, "train.csv"), header=False, index=False
)
pd.concat([Y_val, X_val], axis=1).to_csv(
    os.path.join(data_dir, "validation.csv"), header=False, index=False
)

val_location = session.upload_data(os.path.join(data_dir, "validation.csv"), key_prefix=prefix)
train_location = session.upload_data(os.path.join(data_dir, "train.csv"), key_prefix=prefix)

In [4]:
s3_input_train = TrainingInput(s3_data=train_location, content_type="text/csv")
s3_input_validation = TrainingInput(s3_data=val_location, content_type="text/csv")

In [5]:
container = retrieve(framework='linear-learner', region=session.boto_region_name, version="latest")

model = sagemaker.estimator.Estimator(
    container,
    role,
    instance_count=1,
    instance_type="ml.m5.large",
    output_path="s3://{}/{}/output".format(session.default_bucket(), prefix),
    sagemaker_session=session,
)

Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: latest.


In [6]:
model.set_hyperparameters(
    feature_dim=X_train.shape[1], predictor_type="regressor", mini_batch_size=100
)

In [7]:
model.fit({"train": s3_input_train, "validation": s3_input_validation}, wait=True)

2021-09-07 11:19:04 Starting - Starting the training job...
2021-09-07 11:19:30 Starting - Launching requested ML instancesProfilerReport-1631013544: InProgress
......
2021-09-07 11:20:30 Starting - Preparing the instances for training............
2021-09-07 11:22:30 Downloading - Downloading input data...
2021-09-07 11:23:00 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[09/07/2021 11:23:05 INFO 140674731267904] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'in

## Deploy the model


To deploy from the same notebook instance, we can just use the deploy method on the model interface.

This will:

- Register the trained model in SageMaker (check it!)
- Create an endpoint configuration with the instance specification we just sent
- Create the real-time endpoint


In [8]:
linear_predictor = model.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge")

-----------------!

## Test the endpoint

Now we have an endpoint to call. There are multiple ways to interact with a deployed model, but the mains are:

- Using the Sagemaker SDK (through the model interface as we will do now or with the sagemaker runtime a the next lab)
- Through a request (lab 7)

An endpoint in the very end is just a load balancing that redirects your request to the model, runs it, and returns the prediction. It also sends metrics to cloudwatch for monitoring and autoscaling.

In [10]:
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

linear_predictor.serializer = CSVSerializer()
linear_predictor.deserializer = JSONDeserializer()


To interact with the model interface in the SDK, use the predictor.predict method. The predictor is the output of the deploy method on the model.

In [38]:

test_set = pd.concat([X_test, Y_test], axis=1)

def get_predictions(x):
    row = x.values
    return linear_predictor.predict(row[1:], initial_args={"ContentType": "text/csv"})['predictions'][0]['score']
    
test_set['predictions'] = test_set.apply(get_predictions, axis=1)  # This could be done all at once too!


[-0.34810887 -0.51359612  0.52386114  3.54876739  0.11542494 -0.67306156
 -0.3133354  -0.40632946 -0.1364601  -0.65862219 -0.91322502  0.33189817
  0.31289493 -0.03083003]
{'predictions': [{'score': -0.4846033453941345}]}


In [41]:
test_set

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE,predictions
313,-0.347407,0.982243,-0.638071,-0.281788,-0.936577,-0.940181,-1.298120,1.074211,-0.224991,-0.480898,-0.677403,0.358558,0.139964,-0.731324,-0.733373
249,-0.267329,0.341169,-0.948216,-0.281788,0.350037,2.862596,0.079151,-0.696052,-0.136460,-0.734790,-2.516815,0.151400,-0.659040,2.985812,-0.738268
162,-0.346797,-0.513596,-0.936344,-0.281788,-0.259955,-0.046359,0.307507,-0.320147,-0.136460,-0.531676,-0.818896,0.393084,-0.843179,0.116048,-0.210224
293,-0.235172,-0.513596,-0.068234,-0.281788,0.059118,-0.329623,-0.427513,-0.604289,-0.158593,-0.480898,0.030063,0.421491,-0.892817,-0.166409,-0.387766
371,0.709822,-0.513596,1.148603,-0.281788,1.692020,-0.499875,1.088913,-0.925564,0.284063,1.816825,0.879023,-1.259367,0.663560,-1.058974,1.452641
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49,-0.346896,0.383908,-0.700396,-0.281788,-0.926254,0.241307,-1.558589,1.147219,-0.158593,-0.868083,-0.724567,0.421491,-1.004901,0.161241,-1.099835
257,-0.340461,1.195935,-0.586132,-0.281788,-0.851178,0.744724,-0.784319,0.078373,-0.158593,-0.798263,-0.347252,0.421491,-1.373180,0.952121,-0.922804
124,-0.221398,-0.513596,1.711020,-0.281788,0.809877,-1.041451,1.067504,-0.881381,-0.158593,0.363293,1.350667,0.421491,1.086280,-1.047676,0.696246
80,-0.347352,-0.513596,-0.871051,-0.281788,-0.832409,0.062250,-0.598780,0.293241,-0.180726,-0.842694,0.077228,0.421491,-0.309976,0.036960,-0.676379


Let's calculate the error on the predictions. It is very difficult to measure how good a regression model is, you are usually happy comparing how far the predictions are.

In [44]:
import numpy as np
np.sum((test_set['PRICE'] - test_set['predictions'])**2)

390.00436487470466

## Delete the model

Never forget to delete your resources once you are done!

In [45]:
linear_predictor.delete_model()
linear_predictor.delete_endpoint()

