In [1]:
%matplotlib inline
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import os

## XGBoost Cloud Prediction Invocation Template

Invoke SageMaker Prediction Service

In [2]:
import boto3
import re
import sagemaker
from sagemaker import get_execution_role

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [5]:
# Acquire realtime endpoint
endpoint_name = 'xgboost-biketrain-vl'
predictor = sagemaker.predictor.Predictor(endpoint_name = endpoint_name)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [22]:
#from sagemaker.predictor import csv_serializer, json_deserializer
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer, CSVDeserializer
from sagemaker.content_types import CONTENT_TYPE_CSV, CONTENT_TYPE_JSON

predictor.content_type = "text/csv"
predictor.serializer = CSVSerializer()
predictor.deserializer = JSONDeserializer()
predictor.accept = CONTENT_TYPE_JSON


In [8]:
df_all = pd.read_csv('bike_test.csv')
df_all.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,dayofweek,hour
0,2011-01-20 00:00:00,1,0,1,1,10.66,11.365,56,26.0027,2011,1,20,3,0
1,2011-01-20 01:00:00,1,0,1,1,10.66,13.635,56,0.0,2011,1,20,3,1
2,2011-01-20 02:00:00,1,0,1,1,10.66,13.635,56,0.0,2011,1,20,3,2
3,2011-01-20 03:00:00,1,0,1,1,10.66,12.88,56,11.0014,2011,1,20,3,3
4,2011-01-20 04:00:00,1,0,1,1,10.66,12.88,56,11.0014,2011,1,20,3,4


In [9]:
df_all.columns[1:]

Index(['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp',
       'humidity', 'windspeed', 'year', 'month', 'day', 'dayofweek', 'hour'],
      dtype='object')

In [33]:
df_all.drop(columns = ['datetime'], inplace=True)
df_all.head(4)

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,dayofweek,hour
0,1,0,1,1,10.66,11.365,56,26.0027,2011,1,20,3,0
1,1,0,1,1,10.66,13.635,56,0.0,2011,1,20,3,1
2,1,0,1,1,10.66,13.635,56,0.0,2011,1,20,3,2
3,1,0,1,1,10.66,12.88,56,11.0014,2011,1,20,3,3


In [34]:
# Need to pass an array to the prediction
# can pass a numpy array or a list of values [[19,1], [20,1]]
#arr_test = df_all.to_numpy()

arr_test = df_all[df_all.columns[1:]].values

In [35]:
arr_test

array([[ 1.,  0.,  1., ..., 20.,  3.,  0.],
       [ 1.,  0.,  1., ..., 20.,  3.,  1.],
       [ 1.,  0.,  1., ..., 20.,  3.,  2.],
       ...,
       [ 1.,  0.,  1., ..., 31.,  0., 21.],
       [ 1.,  0.,  1., ..., 31.,  0., 22.],
       [ 1.,  0.,  1., ..., 31.,  0., 23.]])

In [36]:
type(arr_test)

numpy.ndarray

In [37]:
arr_test.shape

(6493, 13)

In [None]:
arr_test[:5]

In [None]:
result = predictor.predict(arr_test[:2])

In [None]:
result

In [None]:
arr_test.shape

## Split the input data into chunks

There are thousands of rows in this data set for which need inference.
When communicating over internet, it is a good idea to split the data into chunks to prevent payload and timeout error

In [41]:
# For large number of predictions we can split the input data and
# query the prediction service
# array_split is convenient to specify how many splits are needed
predictions = []
for arr in np.array_split(arr_test,10):
    result = predictor.predict(arr)
    result = result.decode("utf-8")
    result = result.split(',')
    print(arr.shape)
    #print(result)
    predictions += [float(r) for r in result]

{'predictions': [{'score': 6.296085357666016}, {'score': -2.9307193756103516}, {'score': -7.297832489013672}, {'score': -4.428560256958008}, {'score': -4.428560256958008}, {'score': 6.0163493156433105}, {'score': 32.01003646850586}, {'score': 115.59781646728516}, {'score': 227.74151611328125}, {'score': 114.65567016601562}, {'score': 27.91819953918457}, {'score': 39.3858528137207}, {'score': 63.78301239013672}, {'score': 53.3805046081543}, {'score': 56.97462463378906}, {'score': 68.29542541503906}, {'score': 86.6563720703125}, {'score': 195.63897705078125}, {'score': 181.79391479492188}, {'score': 114.9919662475586}, {'score': 63.26693344116211}, {'score': 50.07694625854492}, {'score': 35.03616714477539}, {'score': 17.29349708557129}, {'score': 11.855822563171387}, {'score': -5.2882399559021}, {'score': -4.214561939239502}, {'score': -7.119774341583252}, {'score': -0.4179537892341614}, {'score': -1.534906029701233}, {'score': 30.245878219604492}, {'score': 99.56751251220703}, {'score':

{'predictions': [{'score': 158.73414611816406}, {'score': 240.83152770996094}, {'score': 645.865234375}, {'score': 352.0386657714844}, {'score': 216.9493865966797}, {'score': 234.4606475830078}, {'score': 315.1044006347656}, {'score': 312.9858703613281}, {'score': 318.7753601074219}, {'score': 324.5836181640625}, {'score': 436.3274841308594}, {'score': 794.279296875}, {'score': 796.0656127929688}, {'score': 580.4234008789062}, {'score': 338.7428283691406}, {'score': 329.5094909667969}, {'score': 238.6480255126953}, {'score': 183.71987915039062}, {'score': 61.75667953491211}, {'score': 34.099151611328125}, {'score': 24.01809310913086}, {'score': 21.63134765625}, {'score': 20.725187301635742}, {'score': 41.50870895385742}, {'score': 165.88453674316406}, {'score': 439.33319091796875}, {'score': 633.9158935546875}, {'score': 354.44061279296875}, {'score': 253.7874755859375}, {'score': 279.6803283691406}, {'score': 344.9086608886719}, {'score': 345.8456726074219}, {'score': 332.875793457031

In [None]:
len(predicitons)

In [None]:
predictions[1:10]

In [None]:
np.expml(predictions)

In [None]:
df_all['count'] = np.expml(predictions)

In [None]:
df_all.head()

In [None]:
df_all[['datetime', 'count']].to_csv('predicted_count_cloud.csv', index = False)

In [None]:
# Delete Endpoint to prevent unnecessary charges
predictor.delete_endpoint()

### Clean Up

To avoid incurring unnecessary charges, use the AWS Management Console to delete the endpoints and resources that you created while running the exercises.

    1.Open the Amazon SageMaker console at https://console.aws.amazon.com/sagemaker/ and delete the following resources:

        The endpoint. 
        Deleting the endpoint also deletes the ML compute instance or instances that support it.

        Under Inference, choose Endpoints.

        Choose the endpoint that you created in the example, choose Actions, and then choose Delete.

        The endpoint configuration.

        Under Inference, choose Endpoint configurations.

        Choose the endpoint configuration that you created in the example, choose Actions, and then choose Delete.

        The model.

        Under Inference, choose Models.

        Choose the model that you created in the example, choose Actions, and then choose Delete.

        The notebook instance. 
        Before deleting the notebook instance, stop it.

        Under Notebook, choose Notebook instances.

        Choose the notebook instance that you created in the example, choose Actions, and then choose Stop. The notebook instance takes several minutes to stop. When the Status changes to Stopped, move on to the next step.

        Choose Actions, and then choose Delete.

    2.Open the Amazon S3 console at https://console.aws.amazon.com/s3/, and then delete the bucket that you created for storing model artifacts and the training dataset.

    3.Open the Amazon CloudWatch console at https://console.aws.amazon.com/cloudwatch/, and then delete all of the log groups that have names starting with /aws/sagemaker/.

Note:
Training jobs and logs cannot be deleted and are retained indefinitely.
If you plan to explore other exercises in this guide, you might want to keep some of these resources, such as your notebook instance, S3 bucket, and IAM role.