In [1]:
import time
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import datetime


# This code is derived from AWS SageMaker Samples:
# https://github.com/awslabs/amazon-sagemaker-examples/tree/master/introduction_to_amazon_algorithms/deepar_electricity
# https://github.com/awslabs/amazon-sagemaker-examples/tree/master/introduction_to_amazon_algorithms/deepar_synthetic

In [2]:
import boto3
import sagemaker
from sagemaker import get_execution_role

## Package versions used in this project

| **Package** | Version |
|-------------|---------|
| numpy | 1.14.6 |
| numy-base | 1.14.6 |
| pandas | 0.24.2 |
| jsonschema | 2.6.0 |
| matplotlib | 3.1.3 |
| matplotlib-base | 3.1.3 |
| boto3 | 1.12.0 |
| sagemaker | 1.55.4 |

# Creating Base Name Conventions

In [3]:
# Set a good base job name when building different models
# This helps with identifying training models and endpoints
with_categories = False
if with_categories:
    base_job_name = 'deepar-biketrain-with-categories'
else:
    base_job_name = 'deepar-biketrain-no-categories'

In [4]:
# Specify Your Bucket Name
bucket = 's3-2-ml-sagemaker'
prefix = 'deepar/bikerental'

# This structure allows multiple training and test files for model development and testing
if with_categories:
    s3_data_path = "{}/{}/data_with_categories".format(bucket, prefix)
else:
    s3_data_path = "{}/{}/data".format(bucket, prefix)

s3_output_path = "{}/{}/ouput".format(bucket, prefix)

In [5]:
s3_data_path, s3_output_path # Review the directories

('s3-2-ml-sagemaker/deepar/bikerental/data',
 's3-2-ml-sagemaker/deepar/bikerental/ouput')

## Creating Output File Through Boto3

In [6]:
def write_to_s3(filename, bucket, key):
    with open(filename, 'rb') as f: # read in binary mode
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

## Upload Training and Test Files to S3 bucket

Using JSON files created in the data preparation phase

In [7]:
if with_categories:
    write_to_s3('train_with_categories.json',bucket,'deepar/bikerental/data_with_categories/train/train_with_categories.json')
    write_to_s3('test_with_categories.json',bucket,'deepar/bikerental/data_with_categories/test/test_with_categories.json')
else:
    write_to_s3('train.json',bucket,'deepar/bikerental/data/train/train.json')
    write_to_s3('test.json',bucket,'deepar/bikerental/data/test/test.json')

In [8]:
sagemaker_session = sagemaker.Session()
role = get_execution_role()

## Use SageMaker's Estimator
Ref: [SageMaker - Amazon Estimators 1.55.4](https://sagemaker.readthedocs.io/en/stable/sagemaker.amazon.amazon_estimator.html)

In [11]:
from sagemaker.amazon.amazon_estimator import get_image_uri
image_name = get_image_uri(boto3.Session().region_name, 'forecasting-deepar')

In [12]:
image_name # data review - verify Docker image for the region

'566113047672.dkr.ecr.us-east-2.amazonaws.com/forecasting-deepar:1'

In [13]:
freq='H' # Timeseries consists Hourly Data and we need to predict hourly rental count

# how far in the future predictions can be made
# 12 days worth of hourly forecast 
prediction_length = 288 

# aws recommends setting context same as prediction length as a starting point. 
# This controls how far in the past the network can see
context_length = 288

In [14]:
# Check Billing Tiers
# https://aws.amazon.com/sagemaker/pricing/
# If you are outside of free-tier, you can also use ml.m5.xlarge  (newer generation instance)
# In this example, I am using ml.m5.xlarge for training
estimator = sagemaker.estimator.Estimator(
    sagemaker_session=sagemaker_session,
    image_name=image_name,
    role=role,
    train_instance_count=1,
    train_instance_type='ml.m5.xlarge',
    base_job_name=base_job_name,
    output_path="s3://" + s3_output_path
)

In [15]:
freq, context_length, prediction_length

('H', 288, 288)

## Model Hyperparameters

Ref: [SageMaker DeepAr Hyperparameters](https://docs.aws.amazon.com/sagemaker/latest/dg/deepar_hyperparameters.html)

Notes: Adjusted Hyperparameters
1. time frequency set to H (freq)
1. epochs 400
1. early stopping patience 40, stop after X epochs, if no improvement
1. mini-batch set to 128
1. learning rate 5E-4, 0.0005
1. context length set to 288 (string of context_length)
1. prediction length set to 288 (string of prediction_length)
1. cardinality : auto for data tht has categories or *nothing*. This is an array specifying the number of categories (groups) per categorical feature.

In [16]:
hyperparameters = {
    "time_freq": freq,
    "epochs": "400",
    "early_stopping_patience": "40",
    "mini_batch_size": "128",
    "learning_rate": "5E-4",
    "context_length": str(context_length),
    "prediction_length": str(prediction_length),
    "cardinality" : "auto" if with_categories else ''
}

In [17]:
hyperparameters # verify the settings

{'time_freq': 'H',
 'epochs': '400',
 'early_stopping_patience': '40',
 'mini_batch_size': '128',
 'learning_rate': '5E-4',
 'context_length': '288',
 'prediction_length': '288',
 'cardinality': ''}

In [18]:
estimator.set_hyperparameters(**hyperparameters)

### Use Referenced Training and Testing Paths

In [19]:
# SageMaker will use all the files available to it, in JSON format
data_channel = {
    "train": "s3://{}/train/".format(s3_data_path),
    "test": "s3://{}/test/".format(s3_data_path)
}

# Train The Model

In [21]:
# This will takes around 35 minutes to train with m4.xlarge instance
estimator.fit(inputs=data_channel)

2020-04-25 03:26:35 Starting - Starting the training job...
2020-04-25 03:26:37 Starting - Launching requested ML instances...
2020-04-25 03:27:33 Starting - Preparing the instances for training......
2020-04-25 03:28:23 Downloading - Downloading input data
2020-04-25 03:28:23 Training - Downloading the training image.....[34mArguments: train[0m
[34m[04/25/2020 03:29:17 INFO 140327940233024] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'num_dynamic_feat': u'auto', u'dropout_rate': u'0.10', u'mini_batch_size': u'128', u'test_quantiles': u'[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]', u'_tuning_objective_metric': u'', u'_num_gpus': u'auto', u'num_eval_samples': u'100', u'learning_rate': u'0.001', u'num_cells': u'40', u'num_layers': u'2', u'embedding_dimension': u'10', u'_kvstore': u'auto', u'_num_kv_servers': u'auto', u'cardinality': u'auto', u'likelihood': u'student-t', u'early_stopping_patience': u''}[0m
[34

## Get Job Name and Create End Point

In [22]:
job_name = estimator.latest_training_job.name

# Hard code name for now as we stopped the notebook.  
# If you do this in a single sitting, you don't need to hard code
# job_name = 'deepar-biketrain-with-categories-2018-12-21-04-05-44-478'

In [None]:
print ('job name: {0}'.format(job_name))

In [23]:
# Create an endpoint for real-time predictions
endpoint_name = sagemaker_session.endpoint_from_job(
    job_name=job_name,
    initial_instance_count=1,
    instance_type='ml.m5.xlarge',
    deployment_image=image_name,
    role=role
)

-------------!

In [24]:
print ('endpoint name: {0}'.format(endpoint_name))

endpoint name: deepar-biketrain-no-categories-2020-04-25-03-26-34-872


## Clean up the Project

In [26]:
# Don't forget to terminate the end point after completing the demo
# Otherwise, you account will accumulate hourly charges

# you can delete from sagemaker management console or through command line or throught code

sagemaker_session.delete_endpoint(endpoint_name)

ClientError: An error occurred (ValidationException) when calling the DeleteEndpoint operation: Could not find endpoint "arn:aws:sagemaker:us-east-2:399426528351:endpoint/deepar-biketrain-no-categories-2020-04-25-03-26-34-872".