In [94]:
import theano
from pylearn2.models import mlp
from pylearn2.training_algorithms import sgd, learning_rule
from pylearn2.termination_criteria import EpochCounter, MonitorBased
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from pylearn2.train_extensions import best_params
from pylearn2.utils import serial
from sklearn.externals import joblib
import numpy as np
from random import randint
from keras.utils import np_utils
import itertools

In [41]:
from pprint import PrettyPrinter
pp = PrettyPrinter(depth=6)

In [3]:
# features, lables를 DenseMatrix에 넣어야함
class dataset(DenseDesignMatrix):
    def __init__(self, features, labels):
        super(dataset, self).__init__(X=features, y=labels)
        
    def split(self, prop=.8):
        cutoff = int(len(self.y) * prop)
        X1, X2 = self.X[:cutoff], self.X[cutoff:]
        y1, y2 = self.y[:cutoff], self.y[cutoff:]
        return dataset(X1, y1), dataset(X2, y2)
    
    @property
    def nr_inputs(self):
        return len(self.X[0])
 
    def __len__(self):
        return self.X.shape[0]
 
    def __iter__(self):
        return itertools.izip_longest(self.X, self.y)

In [84]:
features = joblib.load("./mldata/features.mat")
labels = joblib.load("./mldata/lables.mat")

In [85]:
print labels[0]
labels = np_utils.to_categorical(labels, 9)
print labels[0]

6
[ 0.  0.  0.  0.  0.  0.  1.  0.  0.]


In [86]:
ds_train = dataset(features, labels)
ds_train, ds_valid = ds_train.split(0.6)
ds_valid, ds_test = ds_valid.split(0.5)

In [87]:
print ds_train.__len__()
print ds_train.nr_inputs
print ds_valid.__len__()
print ds_test.__len__()

3600
784
1200
1200


In [92]:
# function for classifying a input vector
def classify(inp):
    inp = np.asarray(inp)
    inp.shape = (1, ds_train.nr_inputs)
    return np.argmax(ann.fprop(theano.shared(inp, name='inputs')).eval())
 
# function for calculating and printing the models accuracy on a given dataset
def score(dataset):
    nr_correct = 0
    for features, label in dataset:
        if classify(features) == np.argmax(label):
            nr_correct += 1
    print '%s/%s correct' % (nr_correct, len(dataset))

In [68]:
# add monitor for saving the model with best score
monitor_save_best = best_params.MonitorBasedSaveBest('output_misclass', '/tmp/best.pkl')

In [71]:
# create hidden layer with 2 nodes, init weights in range -0.1 to 0.1 and add
# a bias with value 1
hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=50, irange=.1, init_bias=1.)

# create Softmax output layer
output_layer = mlp.Softmax(9, 'output', irange=.1)

# create Stochastic Gradient Descent trainer that runs for 400 epochs
trainer = sgd.SGD(learning_rate=.05, batch_size=50, termination_criterion=EpochCounter(100))

layers = [hidden_layer, output_layer]

ann = mlp.MLP(layers, nvis=784)
trainer.setup(ann, ds_train)

# train neural net until the termination criterion is true
while True:
    trainer.train(dataset=ds_train)
    ann.monitor.report_epoch()
    ann.monitor()
#     monitor_save_best.on_monitor(ann, ds_valid, trainer)
    if not trainer.continue_learning(ann):
        break

Parameter and initial learning rate summary:
	hidden_W: 0.05
	hidden_b: 0.05
	softmax_b: 0.05
	softmax_W: 0.05
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 0.403594 seconds
compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.018859 seconds
Monitored channels: 
Compiling accum...
Compiling accum done. Time elapsed: 0.000799 seconds
Monitoring step:
	Epochs seen: 1
	Batches seen: 72
	Examples seen: 3600
Monitoring step:
	Epochs seen: 2
	Batches seen: 144
	Examples seen: 7200
Monitoring step:
	Epochs seen: 3
	Batches seen: 216
	Examples seen: 10800
Monitoring step:
	Epochs seen: 4
	Batches seen: 288
	Examples seen: 14400
Monitoring step:
	Epochs seen: 5
	Batches seen: 360
	Examples seen: 18000
Monitoring step:
	Epochs seen: 6
	Batches seen: 432
	Examples seen: 21600
Monitoring step:
	Epochs seen: 7
	Batches seen: 504
	Examples seen: 25200
Monitoring step:
	Epochs seen: 8
	Batches seen: 576
	Examples seen: 28800
Monitoring step:
	Epochs se

In [72]:
r = ann.fprop(theano.shared(ds_test.X[0:1], name='inputs')).eval()
print r.argmax()
print ds_test.y[0:1]

8
[[ 0.  0.  1.  0.  0.  0.  0.  0.  0.]]


In [64]:
%pprint off

Pretty printing has been turned ON


In [79]:
pp.pprint(ann.__dict__)

{'_input_source': 'features',
 '_nested': False,
 '_target_source': 'targets',
 '_test_batch_size': 50,
 'batch_size': None,
 'extensions': [],
 'force_batch_size': None,
 'freeze_set': set([]),
 'input_space': VectorSpace(dim=784, dtype=float64),
 'layer_name': None,
 'layer_names': set(['hidden', 'output']),
 'layers': [<pylearn2.models.mlp.Sigmoid object at 0x10e5a0f90>,
            <pylearn2.models.mlp.Softmax object at 0x10e40f350>],
 'monitor': <pylearn2.monitor.Monitor object at 0x10955e490>,
 'monitor_targets': True,
 'names_to_del': set([]),
 'rng': <mtrand.RandomState object at 0x10e631990>,
 'seed': [2013, 1, 4]}


In [80]:
pp.pprint(ann.layers[0].__dict__)

{'W_lr_scale': None,
 'b': hidden_b,
 'b_lr_scale': None,
 'copy_input': None,
 'dim': 50,
 'extensions': [],
 'include_prob': 1.0,
 'init_bias': 1.0,
 'input_dim': 784,
 'input_space': VectorSpace(dim=784, dtype=float64),
 'irange': 0.1,
 'istdev': None,
 'layer_name': 'hidden',
 'mask_weights': None,
 'max_col_norm': None,
 'max_row_norm': None,
 'min_col_norm': None,
 'mlp': <pylearn2.models.mlp.MLP object at 0x10e41ed50>,
 'monitor_style': 'detection',
 'names_to_del': set([]),
 'output_space': VectorSpace(dim=50, dtype=float64),
 'requires_reformat': False,
 'sparse_init': None,
 'sparse_stdev': 1.0,
 'transformer': <pylearn2.linear.matrixmul.MatrixMul object at 0x10e4a0690>,
 'use_abs_loss': False,
 'use_bias': True}


In [90]:
# 다른방식으로 해보기
hidden1_layer = mlp.RectifiedLinear(layer_name='hidden1', dim=300, irange=.1, init_bias=1.)
hidden2_layer = mlp.RectifiedLinear(layer_name='hidden2', dim=200, irange=.1, init_bias=1.)

# create Softmax output layer
output_layer = mlp.Softmax(9, 'output', irange=.1)

layers = [hidden1_layer, hidden2_layer, output_layer]

# termination criterion that stops after 50 epochs without
# any increase in misclassification on the validation set
termination_criterion = MonitorBased(channel_name='output_misclass', N=50, prop_decrease=0.0)

# momentum
initial_momentum = .5
final_momentum = .99
start = 1
saturate = 50
momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate)
momentum_rule = learning_rule.Momentum(initial_momentum)

# learning rate
start = 1
saturate = 50
decay_factor = .1
learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor)

# create neural net
ann = mlp.MLP(layers, nvis=ds_train.X.shape[-1])

# create Stochastic Gradient Descent trainer 
trainer = sgd.SGD(learning_rate=.05, batch_size=10, monitoring_dataset=ds_valid,
                  termination_criterion=termination_criterion, learning_rule=momentum_rule)
trainer.setup(ann, ds_train)

# add monitor for saving the model with best score
monitor_save_best = best_params.MonitorBasedSaveBest('output_misclass', '/tmp/best.pkl')

# train neural net until the termination criterion is true
while True:
    trainer.train(dataset=ds_train)
    ann.monitor.report_epoch()
    ann.monitor()
    monitor_save_best.on_monitor(ann, ds_valid, trainer)
    if not trainer.continue_learning(ann):
        break
    momentum_adjustor.on_monitor(ann, ds_valid, trainer)
    learning_rate_adjustor.on_monitor(ann, ds_valid, trainer)

Parameter and initial learning rate summary:
	hidden1_W: 0.05
	hidden1_b: 0.05
	hidden2_W: 0.05
	hidden2_b: 0.05
	softmax_b: 0.05
	softmax_W: 0.05
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 0.566007 seconds
compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.134696 seconds
Monitored channels: 
	hidden1_col_norms_max
	hidden1_col_norms_mean
	hidden1_col_norms_min
	hidden1_max_x_max_u
	hidden1_max_x_mean_u
	hidden1_max_x_min_u
	hidden1_mean_x_max_u
	hidden1_mean_x_mean_u
	hidden1_mean_x_min_u
	hidden1_min_x_max_u
	hidden1_min_x_mean_u
	hidden1_min_x_min_u
	hidden1_range_x_max_u
	hidden1_range_x_mean_u
	hidden1_range_x_min_u
	hidden1_row_norms_max
	hidden1_row_norms_mean
	hidden1_row_norms_min
	hidden2_col_norms_max
	hidden2_col_norms_mean
	hidden2_col_norms_min
	hidden2_max_x_max_u
	hidden2_max_x_mean_u
	hidden2_max_x_min_u
	hidden2_mean_x_max_u
	hidden2_mean_x_mean_u
	hidden2_mean_x_min_u
	hidden2_min_x_max_u
	hidden2_min_x_mean_u
	hi

  'indicating yaml_src')


Monitoring step:
	Epochs seen: 2
	Batches seen: 720
	Examples seen: 7200
	hidden1_col_norms_max: 1.648e+30
	hidden1_col_norms_mean: 8.398e+27
	hidden1_col_norms_min: 1.5586597894
	hidden1_max_x_max_u: 0.0
	hidden1_max_x_mean_u: 0.0
	hidden1_max_x_min_u: 0.0
	hidden1_mean_x_max_u: 0.0
	hidden1_mean_x_mean_u: 0.0
	hidden1_mean_x_min_u: 0.0
	hidden1_min_x_max_u: 0.0
	hidden1_min_x_mean_u: 0.0
	hidden1_min_x_min_u: 0.0
	hidden1_range_x_max_u: 0.0
	hidden1_range_x_mean_u: 0.0
	hidden1_range_x_min_u: 0.0
	hidden1_row_norms_max: 9.448e+28
	hidden1_row_norms_mean: 4.809e+28
	hidden1_row_norms_min: 0.965449489813
	hidden2_col_norms_max: 1.405e+33
	hidden2_col_norms_mean: 7.047e+30
	hidden2_col_norms_min: 5.03228082988
	hidden2_max_x_max_u: 0.251338526929
	hidden2_max_x_mean_u: 0.00125669263464
	hidden2_max_x_min_u: 0.0
	hidden2_mean_x_max_u: 0.251338526929
	hidden2_mean_x_mean_u: 0.00125669263464
	hidden2_mean_x_min_u: 0.0
	hidden2_min_x_max_u: 0.251338526929
	hidden2_min_x_mean_u: 0.0012566926

In [91]:
r = ann.fprop(theano.shared(ds_test.X[0:1], name='inputs')).eval()
print r.argmax()
print ds_test.y[0:1]

1
[[ 0.  0.  1.  0.  0.  0.  0.  0.  0.]]


In [None]:
print 'Accuracy of train set:'
score(ds_train)
print 'Accuracy of validation set:'
score(ds_valid)
print 'Accuracy of test set:'
score(ds_test)