Skip to content
This repository has been archived by the owner on Jul 10, 2021. It is now read-only.

Commit

Permalink
Merge 9485e14 into d221c57
Browse files Browse the repository at this point in the history
  • Loading branch information
alexjc committed May 23, 2015
2 parents d221c57 + 9485e14 commit f527528
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 25 deletions.
50 changes: 35 additions & 15 deletions examples/bench_cifar10.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,34 @@
# -*- coding: utf-8 -*-
from __future__ import (absolute_import, unicode_literals, print_function)

import sys
import pickle
import logging
import numpy as np

logging.basicConfig(format="%(message)s", level=logging.DEBUG, stream=sys.stdout)

PRETRAIN = False


def load(name):
print("\t"+name)
try:
with open(name, 'rb') as f:
return pickle.load(f, encoding='latin1')
return pickle.load(f) # , encoding='latin1')
except IOError:
import gzip
with gzip.open(name+'.gz', 'rb') as f:
return pickle.load(f, encoding='latin1')
return pickle.load(f) # , encoding='latin1')

print("Loading...")
dataset1 = load('data_batch_1')
dataset2 = load('data_batch_2')
dataset3 = load('data_batch_3')
print("")

data_train = np.vstack([dataset1['data'], dataset2['data']])
labels_train = np.hstack([dataset1['labels'], dataset2['labels']])
data_train = np.vstack([dataset1['data']]) #, dataset2['data']])
labels_train = np.hstack([dataset1['labels']]) #, dataset2['labels']])

data_train = data_train.astype('float') / 255.
labels_train = labels_train
Expand All @@ -36,17 +43,30 @@ def load(name):
logging.basicConfig(format="%(message)s", level=logging.DEBUG, stream=sys.stdout)

from sknn import mlp
net = mlp.Classifier(
layers=[
mlp.Layer("Rectifier", units=n_feat*2/3),
mlp.Layer("Rectifier", units=n_feat*1/3),
mlp.Layer("Softmax", units=n_targets)],
n_iter=50,
n_stable=10,
learning_rate=0.001,
valid_size=0.1,
verbose=1)
net.fit(data_train, labels_train)
nn = mlp.Classifier(
layers=[
mlp.Layer("Sigmoid", units=128),
mlp.Layer("Sigmoid", units=128),
mlp.Layer("Softmax", units=n_targets)],
n_iter=4,
n_stable=4,
learning_rate=0.001,
valid_size=0.5,
verbose=1)

if PRETRAIN:
from sknn import ae
ae = ae.AutoEncoder(
layers=[
ae.Layer("Sigmoid", units=128),
ae.Layer("Sigmoid", units=128)],
learning_rate=0.002,
n_iter=10,
verbose=1)
ae.fit(data_train)
ae.transfer(nn)

nn.fit(data_train, labels_train)

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
Expand Down
41 changes: 35 additions & 6 deletions sknn/ae.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import sklearn

from .pywrap2 import (autoencoder, transformer_dataset, blocks, ae_costs, corruption)
from .pywrap2 import (autoencoder, sgd, transformer_dataset, blocks, ae_costs, corruption)
from . import nn


Expand Down Expand Up @@ -80,8 +80,10 @@ def __init__(self,
raise NotImplementedError("AutoEncoder layer type `%s` is not implemented." % type)
if cost not in ['msre', 'mbce']:
raise NotImplementedError("Error type '%s' is not implemented." % cost)
if activation not in ['Sigmoid', 'Tanh']:
raise NotImplementedError("Activation type '%s' is not implemented." % activation)

self.activation = activation.lower()
self.activation = activation
self.type = type
self.name = name
self.units = units
Expand Down Expand Up @@ -115,6 +117,19 @@ def fit(self, X):
self : object
Returns this instance.
"""
sgd.log.setLevel(logging.WARNING)
num_samples, data_size = X.shape[0], X.size

log.info("Training on dataset of {:,} samples with {:,} total size.".format(num_samples, data_size))
if self.n_iter:
log.debug(" - Terminating loop after {} total iterations.".format(self.n_iter))
if self.n_stable:
log.debug(" - Early termination after {} stable iterations.".format(self.n_stable))

if self.verbose:
log.debug("\nEpoch Validation Error Time"
"\n---------------------------------")

input_size = [X.shape[1]] + [l.units for l in self.layers[:-1]]
ae_layers = []
for v, l in zip(input_size, self.layers):
Expand Down Expand Up @@ -147,14 +162,28 @@ def transform(self, X):
assert self.dca is not None, "The auto-encoder has not been trained yet."
return self.dca.perform(X)

def transfer(self, nn):
for a, l in zip(self.layers, nn.layers):
assert a.activation == l.type,\
"Mismatch in activation types in target MLP; expected `%s` but found `%s`."\
% (a.activation, l.type)
assert a.units == l.units,\
"Different number of units in target MLP; expected `%i` but found `%i`."\
% (a.units, l.units)

nn.weights = []
for a in self.dca.autoencoders:
nn.weights.append((a.weights.get_value(), a.hidbias.get_value()))

def _create_ae_layer(self, size, layer):
"""Construct an internal pylearn2 layer based on the requested layer type.
"""
activation = layer.activation.lower()
if layer.type == 'autoencoder':
return autoencoder.Autoencoder(size,
layer.units,
layer.activation,
layer.activation,
activation,
activation,
layer.tied_weights,
rng=self.random_state)
if layer.type == 'denoising':
Expand All @@ -163,8 +192,8 @@ def _create_ae_layer(self, size, layer):
return autoencoder.DenoisingAutoencoder(corruptor,
size,
layer.units,
layer.activation,
layer.activation,
activation,
activation,
tied_weights=layer.tied_weights,
rng=self.random_state)

Expand Down
1 change: 0 additions & 1 deletion sknn/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def _setup(self):
self.trainer = None
self.cost = None
self.train_set = None
self.best_valid_error = float("inf")

def _create_mlp_trainer(self, dataset):
sgd.log.setLevel(logging.WARNING)
Expand Down
5 changes: 3 additions & 2 deletions sknn/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ def _create_trainer(self, dataset, cost):
def _train_layer(self, trainer, layer, dataset):
# Bug in PyLearn2 that has some unicode channels, can't sort.
layer.monitor.channels = {str(k): v for k, v in layer.monitor.channels.items()}
best_valid_error = float("inf")

for i in itertools.count(1):
start = time.time()
Expand All @@ -481,12 +482,12 @@ def _train_layer(self, trainer, layer, dataset):
objective = layer.monitor.channels.get('objective', None)
if objective:
avg_valid_error = objective.val_shared.get_value()
self.best_valid_error = min(self.best_valid_error, avg_valid_error)
best_valid_error = min(best_valid_error, avg_valid_error)
else:
# 'objective' channel is only defined with validation set.
avg_valid_error = None

best_valid = bool(self.best_valid_error == avg_valid_error)
best_valid = bool(best_valid_error == avg_valid_error)
log.debug("{:>5} {}{}{} {:>3.1f}s".format(
i,
ansi.GREEN if best_valid else "",
Expand Down
28 changes: 27 additions & 1 deletion sknn/tests/test_ae.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy

from sknn.ae import AutoEncoder as AE, Layer as L

from sknn import mlp

class TestAutoEncoder(unittest.TestCase):

Expand All @@ -17,6 +17,29 @@ def test_FitData(self):
ae = AE(layers=[L("Sigmoid", units=8)], n_iter=1)
ae.fit(X)

def test_FitVerbose(self):
X = numpy.zeros((8,4))
ae = AE(layers=[L("Sigmoid", units=8)], n_iter=1, verbose=1)
ae.fit(X)

def test_TransferSuccess(self):
X = numpy.zeros((8,4))
ae = AE(layers=[L("Tanh", units=4)], n_iter=1)
ae.fit(X)

nn = mlp.MultiLayerPerceptron(
layers=[mlp.Layer("Tanh", units=4)])
ae.transfer(nn)

def test_TransferFailure(self):
X = numpy.zeros((8,4))
ae = AE(layers=[L("Tanh", units=8)], n_iter=1)
ae.fit(X)

nn = mlp.MultiLayerPerceptron(
layers=[mlp.Layer("Tanh", units=4)])
assert_raises(AssertionError, ae.transfer, nn)


class TestParameters(unittest.TestCase):

Expand All @@ -43,3 +66,6 @@ def test_UnknownCostFunction(self):

def test_UnknownType(self):
assert_raises(NotImplementedError, L, "Sigmoid", type="unknown")

def test_UnknownActivation(self):
assert_raises(NotImplementedError, L, "Unknown")

0 comments on commit f527528

Please sign in to comment.