In [5]:
from __future__ import absolute_import
from __future__ import print_function
import numpy as np
np.random.seed(1337) # for reproducibility

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import containers
from keras.layers.core import Dense, AutoEncoder
from keras.activations import sigmoid
from keras.utils import np_utils
from keras.optimizers import RMSprop

In [9]:
batch_size = 64
nb_classes = 10
nb_epoch = 1
nb_hidden_layers = [784, 600, 500, 400]

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_train = X_train.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

# Layer-wise pretraining
encoders = []
nb_hidden_layers = [784, 600, 500, 400]
X_train_tmp = np.copy(X_train)
for i, (n_in, n_out) in enumerate(zip(nb_hidden_layers[:-1], nb_hidden_layers[1:]), start=1):
    print('Training the layer {}: Input {} -> Output {}'.format(i, n_in, n_out))
    # Create AE and training
    ae = Sequential()
    encoder = containers.Sequential([Dense(n_out, input_dim=n_in, activation='sigmoid')])
    decoder = containers.Sequential([Dense(n_in, input_dim=n_out, activation='sigmoid')])
    ae.add(AutoEncoder(encoder=encoder, decoder=decoder,
                       output_reconstruction=False )) #, tie_weights=True))
    
    # optimizer
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06)
    
    ae.compile(loss='mean_squared_error', optimizer=optimizer) #'rmsprop')
    ae.fit(X_train_tmp, X_train_tmp, batch_size=batch_size, nb_epoch=nb_epoch)
    # Store trainined weight and update training data
    encoders.append(ae.layers[0].encoder)
    X_train_tmp = ae.predict(X_train_tmp)

# Fine-turning
model = Sequential()
for encoder in encoders:
    model.add(encoder)
model.add(Dense(nb_classes, input_dim=nb_hidden_layers[-1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0)
print('Test score before fine tuning:', score[0])
print('Test accuracy before fine tuning:', score[1])
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          show_accuracy=True, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0)
print('Test score after fine tuning:', score[0])
print('Test accuracy after fine tuning:', score[1])

60000 train samples
10000 test samples
Training the layer 1: Input 784 -> Output 600
Epoch 1/1
Training the layer 2: Input 600 -> Output 500
Epoch 1/1
Training the layer 3: Input 500 -> Output 400
Epoch 1/1
Test score before fine turning: 2.53544282379
Test accuracy after fine turning: 0.1135
Train on 60000 samples, validate on 10000 samples
Epoch 1/1
Test score after fine turning: 0.355791935372
Test accuracy after fine turning: 0.8968


In [13]:
batch_size = 64
nb_classes = 10
nb_epoch = 1
nb_hidden_layers = [784, 600, 500, 400]

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_train = X_train.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

# Layer-wise pretraining
encoders = []
nb_hidden_layers = [784, 600, 500, 400]
X_train_tmp = np.copy(X_train)
for i, (n_in, n_out) in enumerate(zip(nb_hidden_layers[:-1], nb_hidden_layers[1:]), start=1):
    print('Training the layer {}: Input {} -> Output {}'.format(i, n_in, n_out))
    # 
    encoder = containers.Sequential([Dense(n_out, input_dim=n_in, activation='sigmoid')])
    encoders.append(encoder)
    
# Fine-turning
model = Sequential()
for encoder in encoders:
    model.add(encoder)
model.add(Dense(nb_classes, input_dim=nb_hidden_layers[-1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0)
print('Test score before fine tuning:', score[0])
print('Test accuracy before fine tuning:', score[1])
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          show_accuracy=True, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0)
print('Test score after fine tuning:', score[0])
print('Test accuracy after fine tuning:', score[1])

60000 train samples
10000 test samples
Training the layer 1: Input 784 -> Output 600
Training the layer 2: Input 600 -> Output 500
Training the layer 3: Input 500 -> Output 400
Test score before fine tuning: 2.42104121437
Test accuracy before fine tuning: 0.0974
Train on 60000 samples, validate on 10000 samples
Epoch 1/1
Test score after fine tuning: 0.354463977933
Test accuracy after fine tuning: 0.8931


In [11]:
nb_hidden_layers[:-1]

[784, 600, 500]

In [12]:
nb_hidden_layers[-1]

400