In [7]:
import boto3
import os
import pandas as pd
import numpy as np
from sagemaker.mxnet import MXNet
from sagemaker import get_execution_role

In [8]:
bucket = 'eduthie-sagemaker-1'
prefix = 'lstnet'

role = get_execution_role()

data_dir = 'data'
data_file_path = os.path.join(data_dir,'electricity.txt')

test_bucket_prefix = '/test/'
single_host_train_bucket_prefix = '/train/single_host/'

ValueError: The current AWS identity is not a role: arn:aws:iam::987551451182:user/eduthie, therefore it cannot be used as a SageMaker execution role

In [3]:
df = pd.read_csv(data_file_path,header=None)
print(df.describe())
max_columns = df.max().astype(np.float64)
df = df/max_columns # normalize
print(df.describe())

                0             1             2             3             4    \
count  26304.000000  26304.000000  26304.000000  26304.000000  26304.000000   
mean      23.263762    112.885569     16.821624    440.335196    200.536724   
std       24.127164     25.553141     49.190377    152.601050     69.727348   
min        0.000000      0.000000      0.000000      0.000000      0.000000   
25%        9.000000     95.000000      8.000000    334.750000    148.000000   
50%       11.000000    113.000000      8.000000    400.000000    190.000000   
75%       24.000000    130.000000     10.000000    511.000000    239.000000   
max      140.000000    296.000000    601.000000   1170.000000    547.000000   

                5             6             7             8             9    \
count  26304.000000  26304.000000  26304.000000  26304.000000  26304.000000   
mean     755.035166     25.943659   1022.549270    214.867321    227.069267   
std      249.598861     27.392344    231.480251    

In [5]:
train_frac = 0.8

num_time_steps = len(df)
split_index = int(num_time_steps*train_frac)
train = df[0:split_index]
print('Training size {}'.format(len(train)))
test = df[split_index:]
print('Test size {}'.format(len(test)))

test_file_path = os.path.join(data_dir,'test.csv')
test.to_csv(test_file_path,header=None,index=False)
train_file_path = os.path.join(data_dir,'train.csv')
train.to_csv(train_file_path,header=None,index=False)

client = boto3.client('s3')
client.upload_file(test_file_path, bucket, prefix + test_bucket_prefix + 'test.csv')
client.upload_file(train_file_path, bucket, prefix + single_host_train_bucket_prefix + 'train.csv')

Training size 21043
Test size 5261


In [6]:
hyperparameters = {
    'conv_hid' : 100,
    'gru_hid' : 100,
    'skip_gru_hid' : 5,
    'skip' : 24,
    'ar_window' : 24,
    'window' : 24*7,
    'horizon' : 24,
    'learning_rate' : 0.001,
    'clip_gradient' : 10.,
    'batch_size' : 64,
    'batch_size' : 512,
    'epochs' : 100
}

In [None]:
lstnet1 = MXNet(entry_point='lstnet_sagemaker.py',
    source_dir='src',
    role=role,
    output_path='s3://{}/{}/output'.format(bucket, prefix),
    train_instance_count=1,
    train_instance_type='ml.p2.xlarge',
    hyperparameters=hyperparameters)
lstnet1.fit(inputs={'train': 's3://{}/{}{}'.format(bucket, prefix, single_host_train_bucket_prefix),
    'test': 's3://{}/{}{}'.format(bucket, prefix, test_bucket_prefix)},wait=False)