In [59]:
#imports

import numpy as np
import mxnet as mx
from mxnet import nd, gluon, autograd
import pandas as pd

from sagemaker.mxnet import MXNet
import sagemaker
from sagemaker import get_execution_role

In [61]:
sagemaker_session = sagemaker.Session()
role = get_execution_role()
role


'arn:aws:iam::475933981307:role/service-role/AmazonSageMaker-ExecutionRole-20180102T172706'

In [62]:
data_files = {'train_data': 'data/train_data.npy', 
             'train_label': 'data/train_label.npy',
             'val_data': 'data/val_data.npy',
             'val_label': 'data/val_label.npy'
            }

hyper_parameters = {'batch_size': 100,
                   'learning_rate': 0.01,
                   'num_epochs': 1,
                   'optimizer': 'sgd',
                   'momentum': 0.9,
                   'num_dims': 30}

train_ctx = mx.cpu()
num_examples = 10000
num_input = 30
num_outputs = 2


In [None]:
inputs = 

In [54]:
!cat linear_model_gluon.py

from __future__ import print_function

import logging
import mxnet as mx
from mxnet import gluon, autograd,nd
from mxnet.gluon import nn
import numpy as np
import json
import time
from mxnet.gluon.data.vision import MNIST
import boto3
import gzip,struct


logging.basicConfig(level=logging.DEBUG)

# ------------------------------------------------------------ #
# Training methods                                             #
# ------------------------------------------------------------ #


def train(channel_input_dirs, hyperparameters, hosts, num_gpus, **kwargs):
    # SageMaker passes num_cpus, num_gpus and other args we can use to tailor training to
    # the current container environment, but here we just use simple cpu context.
    ctx = mx.gpu() if num_gpus > 0 else mx.cpu()

    # retrieve the hyperparameters we set in notebook (with some defaults)
    batch_size = hyperparameters.get('batch_size', 100)
    epochs = hyperparameters.get('num_epochs', 10

In [64]:
m = MXNet('linear_model_gluon.py', 
          role=role,
          train_instance_count=1,
          train_instance_type="ml.c4.xlarge",
          hyperparameters=hyper_parameters)

In [65]:
m.fit()

TypeError: fit() missing 1 required positional argument: 'inputs'

In [32]:
def load_data(data_files):
    train_data = np.load(data_files['train_data']).astype(np.float32)
    train_label = np.load(data_files['train_label']).astype(np.float32)
    val_data = np.load(data_files['val_data']).astype(np.float32)
    val_label = np.load(data_files['val_label']).astype(np.float32)
    return (train_data, train_label), (val_data, val_label)                      

In [33]:
def get_train_data(data,batch_size):
    return gluon.data.DataLoader(gluon.data.ArrayDataset(data[0], data[1]), 
                                        shuffle=True, batch_size=batch_size)

In [34]:
def get_eval_data(data, batch_size):
    return gluon.data.DataLoader(gluon.data.ArrayDataset(data[0], data[1]), 
                                        shuffle=False, batch_size=batch_size)

train_data = np.load('data/train_data.npy').astype(np.float32)
train_label = np.load('data/train_label.npy').astype(np.float32)

val_data = np.load('data/val_data.npy').astype(np.float32)
val_label = np.load('data/val_label.npy').astype(np.float32)

print("training data shape= {}; training label shape = {} \nValidation data shape= {}; validation label shape = {}".format(train_data.shape, 
                                                                        train_label.shape,
                                                                        val_data.shape,
                                                                        val_label.shape))
train_set = (train_data, train_label)
test_set = (val_data, val_label)

In [35]:
train_set, test_set = load_data(data_files)



train_data_iter = get_train_data(train_set, batch_size)


test_data_iter = get_eval_data(test_set, batch_size)


In [36]:
print(train_set[0].shape, train_set[1].shape, test_set[0].shape, test_set[1].shape)

(170884, 30) (170884,) (113923, 30) (113923,)


In [37]:
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(num_outputs, in_units=num_input))

In [38]:
net.collect_params()

sequential2_ (
  Parameter sequential2_dense0_weight (shape=(2, 30), dtype=<class 'numpy.float32'>)
  Parameter sequential2_dense0_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)

In [39]:
net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=train_ctx)

In [40]:
loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

In [41]:
trainer = gluon.Trainer(params=net.collect_params(), 
                            optimizer=hyper_parameters.get('optimizer', 'sgd'), 
                            optimizer_params={'learning_rate':hyper_parameters.get('learning_rate', 0.01)})

In [50]:
  
def evaluate_accuracy(net, data_iterator, ctx):
    metric = mx.metric.RMSE()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        metric.update(preds=predictions, labels=label)
    return metric.get()


In [51]:
tt = evaluate_accuracy(net, test_data_iter, train_ctx)


In [52]:
tt

('rmse', 0.0016063481474329151)

In [43]:
batch_size = hyper_parameters.get('batch_size', 100)
loss_sequence = []
num_batches = num_examples / batch_size

for e in range(hyper_parameters.get('num_epochs', 10)):
    cumulative_loss = 0
    # inner loop
    for i, (data, label) in enumerate(train_data_iter):
        data = data.as_in_context(train_ctx)
        label = label.as_in_context(train_ctx)
        with autograd.record():
            output = net(data)
            loss = loss_fn(output, label)
        loss.backward()
        trainer.step(batch_size)
        cumulative_loss += nd.mean(loss).asscalar()
    
    net.save_params('model/l_r_'+str(e)+'.params')
    
    test_accuracy = evaluate_accuracy(test_data_iter, net)
    train_accuracy = evaluate_accuracy(train_data_iter, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))

Epoch 0. Loss: 25136.616999, Train_acc 0.00182579995787, Test_acc 0.00160634814743


In [None]:
def get_train_data(train_data,batch_size):
    return gluon.data.DataLoader(gluon.data.ArrayDataset(train_set[0], train_set[1]), 
                                        shuffle=True, batch_size=batch_size)