In [30]:
import theano
from pylearn2.models import mlp
from pylearn2.training_algorithms import sgd, learning_rule
from pylearn2.termination_criteria import EpochCounter, MonitorBased
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from pylearn2.train_extensions import best_params
from pylearn2.utils import serial
from sklearn.externals import joblib
import numpy as np
from random import randint
from keras.utils import np_utils
import itertools

In [2]:
from pprint import PrettyPrinter
pp = PrettyPrinter(depth=6)

In [3]:
# features, lables를 DenseMatrix에 넣어야함
class dataset(DenseDesignMatrix):
    def __init__(self, features, labels):
        super(dataset, self).__init__(X=features, y=labels)
        
    def split(self, prop=.8):
        cutoff = int(len(self.y) * prop)
        X1, X2 = self.X[:cutoff], self.X[cutoff:]
        y1, y2 = self.y[:cutoff], self.y[cutoff:]
        return dataset(X1, y1), dataset(X2, y2)
    
    @property
    def nr_inputs(self):
        return len(self.X[0])
 
    def __len__(self):
        return self.X.shape[0]
 
    def __iter__(self):
        return itertools.izip_longest(self.X, self.y)

In [4]:
features = joblib.load("./mldata/features.mat")
features = features.astype("float32")
features /= 255.0
labels = joblib.load("./mldata/lables.mat")
labels = np_utils.to_categorical(labels, 9)

In [5]:
ds_train = dataset(features, labels)
ds_train, ds_valid = ds_train.split(0.6)
ds_valid, ds_test = ds_valid.split(0.5)

In [6]:
print ds_train.X[0].shape
print ds_train.y[0]

(784,)
[ 0.  0.  0.  0.  0.  0.  1.  0.  0.]


In [24]:
hidden_layer = mlp.Tanh(layer_name='hidden', dim=20, irange=.1, init_bias=1.)
output_layer = mlp.Softmax(9, 'output', irange=.1)
layers = [hidden_layer, output_layer]

# termination criterion that stops after 50 epochs without
# any increase in misclassification on the validation set
termination_criterion = MonitorBased(channel_name='output_misclass', N=2, prop_decrease=0.0)

# momentum
initial_momentum = .5
final_momentum = .99
start = 1
saturate = 4
momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate)
momentum_rule = learning_rule.Momentum(initial_momentum)

# learning rate
start = 1
saturate = 4
decay_factor = .1
learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor)

# trainer = sgd.SGD(learning_rate=.05, batch_size=50, termination_criterion=EpochCounter(2))
trainer = sgd.SGD(learning_rate=.05, batch_size=50, monitoring_dataset=ds_valid,
                  termination_criterion=termination_criterion, learning_rule=momentum_rule)
layers = [hidden_layer, output_layer]


ann = mlp.MLP(layers, nvis=784)
trainer.setup(ann, ds_train)

Parameter and initial learning rate summary:
	hidden_W: 0.05
	hidden_b: 0.05
	softmax_b: 0.05
	softmax_W: 0.05
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 0.409526 seconds


In [25]:
monitor_save_best = best_params.MonitorBasedSaveBest('output_misclass', '/tmp/best.pkl')

while True:
    trainer.train(dataset=ds_train)
    ann.monitor()
    monitor_save_best.on_monitor(ann, ds_valid, trainer)
    if not trainer.continue_learning(ann):
        break
    momentum_adjustor.on_monitor(ann, ds_valid, trainer)
    learning_rate_adjustor.on_monitor(ann, ds_valid, trainer)

compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.264366 seconds
Monitored channels: 
	hidden_col_norms_max
	hidden_col_norms_mean
	hidden_col_norms_min
	hidden_max_x_max_u
	hidden_max_x_mean_u
	hidden_max_x_min_u
	hidden_mean_x_max_u
	hidden_mean_x_mean_u
	hidden_mean_x_min_u
	hidden_min_x_max_u
	hidden_min_x_mean_u
	hidden_min_x_min_u
	hidden_range_x_max_u
	hidden_range_x_mean_u
	hidden_range_x_min_u
	hidden_row_norms_max
	hidden_row_norms_mean
	hidden_row_norms_min
	learning_rate
	momentum
	objective
	output_col_norms_max
	output_col_norms_mean
	output_col_norms_min
	output_max_max_class
	output_mean_max_class
	output_min_max_class
	output_misclass
	output_nll
	output_row_norms_max
	output_row_norms_mean
	output_row_norms_min
Compiling accum...
graph size: 115
Compiling accum done. Time elapsed: 1.271711 seconds
Monitoring step:
	Epochs seen: 0
	Batches seen: 72
	Examples seen: 3600
	hidden_col_norms_max: 1.73307880083
	hidden_col_norms_mean: 1.65759

  'indicating yaml_src')


Monitoring step:
	Epochs seen: 0
	Batches seen: 144
	Examples seen: 7200
	hidden_col_norms_max: 1.75770359376
	hidden_col_norms_mean: 1.6791520007
	hidden_col_norms_min: 1.58899865794
	hidden_max_x_max_u: 0.999938961429
	hidden_max_x_mean_u: 0.987281005939
	hidden_max_x_min_u: 0.901720543399
	hidden_mean_x_max_u: 0.995046261364
	hidden_mean_x_mean_u: 0.23844876583
	hidden_mean_x_min_u: -0.357851397849
	hidden_min_x_max_u: 0.899378592621
	hidden_min_x_mean_u: -0.700460355438
	hidden_min_x_min_u: -0.99984028989
	hidden_range_x_max_u: 1.99711322869
	hidden_range_x_mean_u: 1.68774136138
	hidden_range_x_min_u: 0.100059509355
	hidden_row_norms_max: 0.354238565915
	hidden_row_norms_mean: 0.26660138633
	hidden_row_norms_min: 0.169732484124
	learning_rate: 0.03875
	momentum: 0.5
	objective: 0.405878823986
	output_col_norms_max: 1.37480093028
	output_col_norms_mean: 1.19312306039
	output_col_norms_min: 0.875839548659
	output_max_max_class: 0.94240072391
	output_mean_max_class: 0.821162151159
	ou