In [13]:
# https://gist.github.com/arngarden/9747287
import theano
from pylearn2.models import mlp
from pylearn2.train_extensions import best_params
from pylearn2.training_algorithms import sgd, learning_rule
from pylearn2.utils import serial
from pylearn2.termination_criteria import MonitorBased
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from sklearn.preprocessing import StandardScaler
import numpy as np
from random import randint
import itertools
import os

In [14]:
PIMA_DATASET = "./pima-indians-diabetes.data.txt"

In [15]:
scaler = StandardScaler()
 
class Pima(DenseDesignMatrix):
    def __init__(self, X=None, y=None):
        X = X
        y = y
        if X is None:
            X = []
            y = []
            with open(PIMA_DATASET) as f:
                for line in f:
                    features, label = line.rsplit(',', 1)
                    X.append(map(float, features.split(',')))
                    if int(label) == 0:
                        y.append([1, 0])
                    else:
                        y.append([0, 1])
            X = np.asarray(X)
            X = scaler.fit_transform(X)
            y = np.asarray(y)
        super(Pima, self).__init__(X=X, y=y)
 
    @property
    def nr_inputs(self):
        return len(self.X[0])
 
    def split(self, prop=.8):
        cutoff = int(len(self.y) * prop)
        X1, X2 = self.X[:cutoff], self.X[cutoff:]
        y1, y2 = self.y[:cutoff], self.y[cutoff:]
        return Pima(X1, y1), Pima(X2, y2)
 
    def __len__(self):
        return self.X.shape[0]
 
    def __iter__(self):
        return itertools.izip_longest(self.X, self.y)

In [16]:
# create datasets
ds_train = Pima()
ds_train, ds_valid = ds_train.split(0.7)
ds_valid, ds_test = ds_valid.split(0.7)

In [17]:
# create sigmoid hidden layer with 20 nodes, init weights in range -0.05 to 0.05 and add
# a bias with value 1
hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=20, irange=.05, init_bias=1.)
# softmax output layer
output_layer = mlp.Softmax(2, 'output', irange=.05)
layers = [hidden_layer, output_layer]

In [18]:
# termination criterion that stops after 50 epochs without
# any increase in misclassification on the validation set
termination_criterion = MonitorBased(channel_name='output_misclass',
                                     N=50, prop_decrease=0.0)

In [19]:
# momentum
initial_momentum = .5
final_momentum = .99
start = 1
saturate = 50
momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate)
momentum_rule = learning_rule.Momentum(initial_momentum)

In [20]:
# learning rate
start = 1
saturate = 50
decay_factor = .1
learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor)

In [21]:
# create neural net
ann = mlp.MLP(layers, nvis=ds_train.nr_inputs)

In [22]:
ds_train.nr_inputs

8

In [23]:
# create Stochastic Gradient Descent trainer 
trainer = sgd.SGD(learning_rate=.05, batch_size=10, monitoring_dataset=ds_valid,
                  termination_criterion=termination_criterion, learning_rule=momentum_rule)
trainer.setup(ann, ds_train)

Parameter and initial learning rate summary:
	hidden_W: 0.05
	hidden_b: 0.05
	softmax_b: 0.05
	softmax_W: 0.05
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 1.844264 seconds


In [24]:
print ds_train.X.shape
print ds_train.y.shape
print ds_train.y[0]

(537, 8)
(537, 2)
[0 1]


In [25]:
# add monitor for saving the model with best score
monitor_save_best = best_params.MonitorBasedSaveBest('output_misclass',
                                                     '/tmp/best.pkl')

In [26]:
# train neural net until the termination criterion is true
while True:
    trainer.train(dataset=ds_train)
    ann.monitor.report_epoch()
    ann.monitor()
    monitor_save_best.on_monitor(ann, ds_valid, trainer)
    if not trainer.continue_learning(ann):
        break
    momentum_adjustor.on_monitor(ann, ds_valid, trainer)
    learning_rate_adjustor.on_monitor(ann, ds_valid, trainer)

compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.118892 seconds
Monitored channels: 
	hidden_col_norms_max
	hidden_col_norms_mean
	hidden_col_norms_min
	hidden_max_x_max_u
	hidden_max_x_mean_u
	hidden_max_x_min_u
	hidden_mean_x_max_u
	hidden_mean_x_mean_u
	hidden_mean_x_min_u
	hidden_min_x_max_u
	hidden_min_x_mean_u
	hidden_min_x_min_u
	hidden_range_x_max_u
	hidden_range_x_mean_u
	hidden_range_x_min_u
	hidden_row_norms_max
	hidden_row_norms_mean
	hidden_row_norms_min
	learning_rate
	momentum
	objective
	output_col_norms_max
	output_col_norms_mean
	output_col_norms_min
	output_max_max_class
	output_mean_max_class
	output_min_max_class
	output_misclass
	output_nll
	output_row_norms_max
	output_row_norms_mean
	output_row_norms_min
Compiling accum...
graph size: 115
Compiling accum done. Time elapsed: 1.673217 seconds
Monitoring step:
	Epochs seen: 1
	Batches seen: 54
	Examples seen: 537
	hidden_col_norms_max: 0.109786641873
	hidden_col_norms_mean: 0.08451

  'indicating yaml_src')


Monitoring step:
	Epochs seen: 2
	Batches seen: 108
	Examples seen: 1074
	hidden_col_norms_max: 0.132866632896
	hidden_col_norms_mean: 0.09400848252
	hidden_col_norms_min: 0.0593944818248
	hidden_max_x_max_u: 0.778959390726
	hidden_max_x_mean_u: 0.756450705718
	hidden_max_x_min_u: 0.739225983286
	hidden_mean_x_max_u: 0.741111638254
	hidden_mean_x_mean_u: 0.729807697106
	hidden_mean_x_min_u: 0.716512105612
	hidden_min_x_max_u: 0.7226511796
	hidden_min_x_mean_u: 0.700208740093
	hidden_min_x_min_u: 0.668049267803
	hidden_range_x_max_u: 0.102604805442
	hidden_range_x_mean_u: 0.0562419656251
	hidden_range_x_min_u: 0.0256893368843
	hidden_row_norms_max: 0.187597788857
	hidden_row_norms_mean: 0.149789582081
	hidden_row_norms_min: 0.119085906015
	learning_rate: 0.0491
	momentum: 0.5
	objective: 0.645925166629
	output_col_norms_max: 0.25827756524
	output_col_norms_mean: 0.247282976762
	output_col_norms_min: 0.236288388283
	output_max_max_class: 0.780283204956
	output_mean_max_class: 0.773584053

In [27]:
# load the best model
ann = serial.load('/tmp/best.pkl')

In [28]:
# function for classifying a input vector
def classify(inp):
    inp = np.asarray(inp)
    inp.shape = (1, ds_train.nr_inputs)
    return np.argmax(ann.fprop(theano.shared(inp, name='inputs')).eval())
 
# function for calculating and printing the models accuracy on a given dataset
def score(dataset):
    nr_correct = 0
    for features, label in dataset:
        if classify(features) == np.argmax(label):
            nr_correct += 1
    print '%s/%s correct' % (nr_correct, len(dataset))

In [29]:
print
print 'Accuracy of train set:'
score(ds_train)
print 'Accuracy of validation set:'
score(ds_valid)
print 'Accuracy of test set:'
score(ds_test)


Accuracy of train set:
398/537 correct
Accuracy of validation set:
136/161 correct
Accuracy of test set:
52/70 correct
