In [39]:
import theano
from pylearn2.models import mlp
from pylearn2.training_algorithms import sgd, learning_rule
from pylearn2.termination_criteria import EpochCounter, MonitorBased
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from pylearn2.train_extensions import best_params
from pylearn2.utils import serial
import numpy as np
from random import randint
import itertools

In [2]:
from pprint import PrettyPrinter
pp = PrettyPrinter(depth=6)

In [4]:
features = [[randint(0, 1), randint(0, 1)] for _ in range(1000)]
labels = []

for a, b in features:
    if a + b == 1:
        labels.append([0, 1])
    else:
        labels.append([1, 0])
features = np.array(features)
labels = np.array(labels)

In [5]:
print features.shape
print features[0]
print labels.shape
print labels[0]

(1000, 2)
[1 1]
(1000, 2)
[1 0]


In [36]:
# features, lables를 DenseMatrix에 넣어야함
class XOR(DenseDesignMatrix):
    def __init__(self, features, labels):
        super(XOR, self).__init__(X=features, y=labels)
        
    def split(self, prop=.8):
        cutoff = int(len(self.y) * prop)
        X1, X2 = self.X[:cutoff], self.X[cutoff:]
        y1, y2 = self.y[:cutoff], self.y[cutoff:]
        return XOR(X1, y1), XOR(X2, y2)
    
    @property
    def nr_inputs(self):
        return len(self.X[0])
 
    def __len__(self):
        return self.X.shape[0]
 
    def __iter__(self):
        return itertools.izip_longest(self.X, self.y)

In [37]:
# create XOR dataset
ds_train = XOR(features, labels)
ds_train, ds_valid = ds_train.split(0.6)
ds_valid, ds_test = ds_valid.split(0.5)

In [8]:
pp.pprint(ds_train.__dict__)

{'X': array([[1, 1],
       [0, 0],
       [0, 1],
       ..., 
       [1, 0],
       [1, 0],
       [1, 0]]),
 'X_labels': None,
 'X_space': VectorSpace(dim=2, dtype=float64),
 'X_topo_space': None,
 '_iter_data_specs': (VectorSpace(dim=2, dtype=float64), 'features'),
 '_iter_mode': <class 'pylearn2.utils.iteration.SequentialSubsetIterator'>,
 '_iter_targets': False,
 '_iter_topo': False,
 'compress': False,
 'data_specs': (CompositeSpace(VectorSpace(dim=2, dtype=float64), VectorSpace(dim=2, dtype=float64)),
                ('features', 'targets')),
 'design_loc': None,
 'preprocessor': None,
 'rng': <mtrand.RandomState object at 0x108af88d0>,
 'view_converter': None,
 'y': array([[1, 0],
       [1, 0],
       [0, 1],
       ..., 
       [0, 1],
       [0, 1],
       [0, 1]]),
 'y_labels': None}


In [9]:
# create hidden layer with 2 nodes, init weights in range -0.1 to 0.1 and add
# a bias with value 1
hidden1_layer = mlp.RectifiedLinear(layer_name='hidden1', dim=4, irange=.1, init_bias=1.)
hidden2_layer = mlp.RectifiedLinear(layer_name='hidden2', dim=2, irange=.1, init_bias=1.)

In [10]:
pp.pprint(hidden1_layer.__dict__)

{'W_lr_scale': None,
 'b': hidden1_b,
 'b_lr_scale': None,
 'copy_input': None,
 'dim': 4,
 'extensions': [],
 'include_prob': 1.0,
 'init_bias': 1.0,
 'irange': 0.1,
 'istdev': None,
 'layer_name': 'hidden1',
 'left_slope': 0.0,
 'mask_weights': None,
 'max_col_norm': None,
 'max_row_norm': None,
 'min_col_norm': None,
 'names_to_del': set([]),
 'sparse_init': None,
 'sparse_stdev': 1.0,
 'use_abs_loss': False,
 'use_bias': True}


In [11]:
# create Softmax output layer
output_layer = mlp.Softmax(2, 'output', irange=.1)

In [12]:
layers = [hidden1_layer, hidden2_layer, output_layer]

In [15]:
# termination criterion that stops after 50 epochs without
# any increase in misclassification on the validation set
termination_criterion = MonitorBased(channel_name='output_misclass', N=50, prop_decrease=0.0)

# momentum
initial_momentum = .5
final_momentum = .99
start = 1
saturate = 50
momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate)
momentum_rule = learning_rule.Momentum(initial_momentum)

# learning rate
start = 1
saturate = 50
decay_factor = .1
learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor)

In [19]:
# create neural net
ann = mlp.MLP(layers, nvis=ds_train.X.shape[-1])

In [20]:
# create Stochastic Gradient Descent trainer 
trainer = sgd.SGD(learning_rate=.05, batch_size=10, monitoring_dataset=ds_valid,
                  termination_criterion=termination_criterion, learning_rule=momentum_rule)
trainer.setup(ann, ds_train)

Parameter and initial learning rate summary:
	hidden1_W: 0.05
	hidden1_b: 0.05
	hidden2_W: 0.05
	hidden2_b: 0.05
	softmax_b: 0.05
	softmax_W: 0.05
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 2.370012 seconds


In [23]:
# add monitor for saving the model with best score
monitor_save_best = best_params.MonitorBasedSaveBest('output_misclass', '/tmp/best.pkl')

In [24]:
ls /tmp/best.pkl

ls: /tmp/best.pkl: No such file or directory


In [25]:
# train neural net until the termination criterion is true
while True:
    trainer.train(dataset=ds_train)
    ann.monitor.report_epoch()
    ann.monitor()
    monitor_save_best.on_monitor(ann, ds_valid, trainer)
    if not trainer.continue_learning(ann):
        break
    momentum_adjustor.on_monitor(ann, ds_valid, trainer)
    learning_rate_adjustor.on_monitor(ann, ds_valid, trainer)

compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.171775 seconds
Monitored channels: 
	hidden1_col_norms_max
	hidden1_col_norms_mean
	hidden1_col_norms_min
	hidden1_max_x_max_u
	hidden1_max_x_mean_u
	hidden1_max_x_min_u
	hidden1_mean_x_max_u
	hidden1_mean_x_mean_u
	hidden1_mean_x_min_u
	hidden1_min_x_max_u
	hidden1_min_x_mean_u
	hidden1_min_x_min_u
	hidden1_range_x_max_u
	hidden1_range_x_mean_u
	hidden1_range_x_min_u
	hidden1_row_norms_max
	hidden1_row_norms_mean
	hidden1_row_norms_min
	hidden2_col_norms_max
	hidden2_col_norms_mean
	hidden2_col_norms_min
	hidden2_max_x_max_u
	hidden2_max_x_mean_u
	hidden2_max_x_min_u
	hidden2_mean_x_max_u
	hidden2_mean_x_mean_u
	hidden2_mean_x_min_u
	hidden2_min_x_max_u
	hidden2_min_x_mean_u
	hidden2_min_x_min_u
	hidden2_range_x_max_u
	hidden2_range_x_mean_u
	hidden2_range_x_min_u
	hidden2_row_norms_max
	hidden2_row_norms_mean
	hidden2_row_norms_min
	learning_rate
	momentum
	objective
	output_col_norms_max
	output_col_n

  'indicating yaml_src')


Monitoring step:
	Epochs seen: 2
	Batches seen: 120
	Examples seen: 1200
	hidden1_col_norms_max: 0.0993952777922
	hidden1_col_norms_mean: 0.0690995991708
	hidden1_col_norms_min: 0.0274063974647
	hidden1_max_x_max_u: 1.13033747639
	hidden1_max_x_mean_u: 1.08408629912
	hidden1_max_x_min_u: 1.00481530462
	hidden1_mean_x_max_u: 1.06766737706
	hidden1_mean_x_mean_u: 1.03861226643
	hidden1_mean_x_min_u: 0.987008686719
	hidden1_min_x_max_u: 1.00751971447
	hidden1_min_x_mean_u: 0.995329317494
	hidden1_min_x_min_u: 0.968003873061
	hidden1_range_x_max_u: 0.123850472025
	hidden1_range_x_mean_u: 0.0887569816264
	hidden1_range_x_min_u: 0.0368114315628
	hidden1_row_norms_max: 0.110849026357
	hidden1_row_norms_mean: 0.104245211493
	hidden1_row_norms_min: 0.0976413966298
	hidden2_col_norms_max: 0.195894975681
	hidden2_col_norms_mean: 0.194868969777
	hidden2_col_norms_min: 0.193842963872
	hidden2_max_x_max_u: 0.680767082261
	hidden2_max_x_mean_u: 0.660043854352
	hidden2_max_x_min_u: 0.639320626443
	hid

In [26]:
ls /tmp/best.pkl

/tmp/best.pkl


In [52]:
# load the best model
ann = serial.load('/tmp/best.pkl')

In [53]:
inputs = np.array([[0, 0]])
print ann.fprop(theano.shared(inputs, name='inputs')).eval()
inputs = np.array([[0, 1]])
print ann.fprop(theano.shared(inputs, name='inputs')).eval()
inputs = np.array([[1, 0]])
print ann.fprop(theano.shared(inputs, name='inputs')).eval()
inputs = np.array([[1, 1]])
print ann.fprop(theano.shared(inputs, name='inputs')).eval()

[[ 0.51886286  0.48113714]]
[[ 0.49352323  0.50647677]]
[[ 0.49364474  0.50635526]]
[[ 0.49079833  0.50920167]]


In [57]:
print ann.fprop(theano.shared(np.array([[0, 0]]))).eval()

[[ 0.51886286  0.48113714]]


In [42]:
pp.pprint(ann.__dict__)

{'_input_source': 'features',
 '_nested': False,
 '_tag': defaultdict(<type 'dict'>, {'MonitorBasedSaveBest': {'best_cost': array(0.24000000000000007)}}),
 '_target_source': 'targets',
 '_test_batch_size': 10,
 'batch_size': None,
 'extensions': [],
 'force_batch_size': None,
 'freeze_set': set([]),
 'input_space': VectorSpace(dim=2, dtype=float64),
 'layer_name': None,
 'layer_names': set(['hidden1', 'hidden2', 'output']),
 'layers': [<pylearn2.models.mlp.RectifiedLinear object at 0x10c033950>,
            <pylearn2.models.mlp.RectifiedLinear object at 0x10c033610>,
            <pylearn2.models.mlp.Softmax object at 0x108b26fd0>],
 'monitor': <pylearn2.monitor.Monitor object at 0x10e180d10>,
 'monitor_targets': True,
 'names_to_del': set([]),
 'rng': <mtrand.RandomState object at 0x10e180f10>,
 'seed': [2013, 1, 4],
 'yaml_src': '!pkl: "/tmp/best.pkl"'}


In [48]:
pp.pprint(ann.layers[0].__dict__)

{'W_lr_scale': None,
 'b': hidden1_b,
 'b_lr_scale': None,
 'copy_input': None,
 'dim': 4,
 'extensions': [],
 'include_prob': 1.0,
 'init_bias': 1.0,
 'input_dim': 2,
 'input_space': VectorSpace(dim=2, dtype=float64),
 'irange': 0.1,
 'istdev': None,
 'layer_name': 'hidden1',
 'left_slope': 0.0,
 'mask_weights': None,
 'max_col_norm': None,
 'max_row_norm': None,
 'min_col_norm': None,
 'mlp': <pylearn2.models.mlp.MLP object at 0x10c033910>,
 'names_to_del': set([]),
 'output_space': VectorSpace(dim=4, dtype=float64),
 'requires_reformat': False,
 'sparse_init': None,
 'sparse_stdev': 1.0,
 'transformer': <pylearn2.linear.matrixmul.MatrixMul object at 0x10c033a50>,
 'use_abs_loss': False,
 'use_bias': True}


In [32]:
# function for classifying a input vector
def classify(inp):
    inp = np.asarray(inp)
    inp.shape = (1, ds_train.nr_inputs)
    return np.argmax(ann.fprop(theano.shared(inp, name='inputs')).eval())
 
# function for calculating and printing the models accuracy on a given dataset
def score(dataset):
    nr_correct = 0
    for features, label in dataset:
        if classify(features) == np.argmax(label):
            nr_correct += 1
    print '%s/%s correct' % (nr_correct, len(dataset))

In [40]:
print 'Accuracy of train set:'
score(ds_train)
print 'Accuracy of validation set:'
score(ds_valid)
print 'Accuracy of test set:'
score(ds_test)

Accuracy of train set:
445/600 correct
Accuracy of validation set:
152/200 correct
Accuracy of test set:
148/200 correct


In [41]:
ds = XOR(features, labels)

In [50]:
# create hidden layer with 2 nodes, init weights in range -0.1 to 0.1 and add
# a bias with value 1
hidden_layer = mlp.Sigmoid(layer_name='hidden', dim=2, irange=.1, init_bias=1.)

# create Softmax output layer
output_layer = mlp.Softmax(2, 'output', irange=.1)

# create Stochastic Gradient Descent trainer that runs for 400 epochs
trainer = sgd.SGD(learning_rate=.05, batch_size=10, termination_criterion=EpochCounter(400))

layers = [hidden_layer, output_layer]

# create neural net that takes two inputs
ann = mlp.MLP(layers, nvis=2)
trainer.setup(ann, ds)

# train neural net until the termination criterion is true
while True:
    trainer.train(dataset=ds)
    ann.monitor.report_epoch()
    ann.monitor()
    if not trainer.continue_learning(ann):
        break

Parameter and initial learning rate summary:
	hidden_W: 0.05
	hidden_b: 0.05
	softmax_b: 0.05
	softmax_W: 0.05
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 0.359824 seconds
compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.022326 seconds
Monitored channels: 
Compiling accum...
Compiling accum done. Time elapsed: 0.000774 seconds
Monitoring step:
	Epochs seen: 1
	Batches seen: 100
	Examples seen: 1000
Monitoring step:
	Epochs seen: 2
	Batches seen: 200
	Examples seen: 2000
Monitoring step:
	Epochs seen: 3
	Batches seen: 300
	Examples seen: 3000
Monitoring step:
	Epochs seen: 4
	Batches seen: 400
	Examples seen: 4000
Monitoring step:
	Epochs seen: 5
	Batches seen: 500
	Examples seen: 5000
Monitoring step:
	Epochs seen: 6
	Batches seen: 600
	Examples seen: 6000
Monitoring step:
	Epochs seen: 7
	Batches seen: 700
	Examples seen: 7000
Monitoring step:
	Epochs seen: 8
	Batches seen: 800
	Examples seen: 8000
Monitoring step:
	Epochs seen: 9

In [51]:
inputs = np.array([[0, 0]])
print ann.fprop(theano.shared(inputs, name='inputs')).eval()
inputs = np.array([[0, 1]])
print ann.fprop(theano.shared(inputs, name='inputs')).eval()
inputs = np.array([[1, 0]])
print ann.fprop(theano.shared(inputs, name='inputs')).eval()
inputs = np.array([[1, 1]])
print ann.fprop(theano.shared(inputs, name='inputs')).eval()

[[ 0.50558197  0.49441803]]
[[ 0.50549487  0.49450513]]
[[ 0.50510029  0.49489971]]
[[ 0.50500916  0.49499084]]
