In [1]:
'''Trains a simple binarize fully connected NN on the MNIST dataset.
Modified from keras' examples/mnist_mlp.py
Gets to 97.9% test accuracy after 20 epochs using theano backend
'''


from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility

import keras.backend as K
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, BatchNormalization
from keras.optimizers import SGD, Adam, RMSprop
from keras.callbacks import LearningRateScheduler
from keras.utils import np_utils

from binary_ops import binary_tanh as binary_tanh_op
from binary_layers import BinaryDense, Clip

from keras.models import load_model


class DropoutNoScale(Dropout):
    '''Keras Dropout does scale the input in training phase, which is undesirable here.
    '''
    def call(self, inputs, training=None):
        if 0. < self.rate < 1.:
            noise_shape = self._get_noise_shape(inputs)

            def dropped_inputs():
                return K.dropout(inputs, self.rate, noise_shape,
                                 seed=self.seed) * (1 - self.rate)
            return K.in_train_phase(dropped_inputs, inputs,
                                    training=training)
        return inputs

def binary_tanh(x):
    return binary_tanh_op(x)


batch_size = 100
epochs = 20
nb_classes = 10
input_num = 784

H = 'Glorot'
kernel_lr_multiplier = 'Glorot'

# network
num_unit = 1024
num_hidden = 3
use_bias = False

# learning rate schedule
lr_start = 1e-3
lr_end = 1e-4
lr_decay = (lr_end / lr_start)**(1. / epochs)

# BN
epsilon = 1e-4
momentum = 0.9

# dropout
drop_in = 0.2
drop_hidden = 0.5

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

Using Theano backend.
Can not use cuDNN on context None: cannot compile with cuDNN. We got this error:
/usr/bin/ld: cannot find -lcudnn
collect2: ld returned 1 exit status

Preallocating 4875/12189 Mb (0.400000) on cuda0
Mapped name None to device cuda0: TITAN X (Pascal) (0000:02:00.0)


60000 train samples
10000 test samples


In [2]:
print(X_train.shape)

(60000, 784)


In [3]:
print(X_test.shape)
X_test = X_test>0
print(type(X_test))
print(type(X_test[0,0]))
X_test = X_test.astype(int)
for i in range(X_test.shape[0]):
    for j in range(X_test.shape[1]):
        if X_test[i,j] == 0:
            X_test[i,j] = -1

(10000, 784)
<type 'numpy.ndarray'>
<type 'numpy.bool_'>


In [4]:
print(X_train.shape)
X_train = X_train>0
print(type(X_train))
print(type(X_train[0,0]))
X_train = X_train.astype(int)
for i in range(X_train.shape[0]):
    for j in range(X_train.shape[1]):
        if X_train[i,j] == 0:
            X_train[i,j] = -1

(60000, 784)
<type 'numpy.ndarray'>
<type 'numpy.bool_'>


In [5]:

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes) * 2 - 1 # -1 or 1 for hinge loss
Y_test = np_utils.to_categorical(y_test, nb_classes) * 2 - 1

model = Sequential()
# model.add(DropoutNoScale(drop_in, input_shape=(784,), name='drop0'))
model.add(BinaryDense(num_unit, input_shape=(input_num,), use_bias=use_bias, name="dense0"))
model.add(Activation(binary_tanh, name='act0'))
for i in range(num_hidden):
    model.add(BinaryDense(num_unit, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias,
              name='dense{}'.format(i+1)))
#     model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn{}'.format(i+1)))
    model.add(Activation(binary_tanh, name='act{}'.format(i+1)))
#     model.add(DropoutNoScale(drop_hidden, name='drop{}'.format(i+1)))
model.add(BinaryDense(10, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias,
          name='dense'))

# model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn'))
# model.add(Activation(binary_tanh, name='act'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense0 (BinaryDense)         (None, 1024)              802816    
_________________________________________________________________
act0 (Activation)            (None, 1024)              0         
_________________________________________________________________
dense1 (BinaryDense)         (None, 1024)              1048576   
_________________________________________________________________
act1 (Activation)            (None, 1024)              0         
_________________________________________________________________
dense2 (BinaryDense)         (None, 1024)              1048576   
_________________________________________________________________
act2 (Activation)            (None, 1024)              0         
_________________________________________________________________
dense3 (BinaryDense)         (None, 1024)              1048576   
__________

In [6]:
opt = Adam(lr=lr_start) 
model.compile(loss='squared_hinge', optimizer=opt, metrics=['acc'])

# deserialized custom layers
#model.save('mlp.h5')
#model = load_model('mlp.h5', custom_objects={'DropoutNoScale': DropoutNoScale,
#                                             'BinaryDense': BinaryDense,
#                                             'Clip': Clip, 
#                                             'binary_tanh': binary_tanh})

lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
history = model.fit(X_train, Y_train,
                    batch_size=batch_size, epochs=epochs,
                    verbose=1, validation_data=(X_test, Y_test),
                    callbacks=[lr_scheduler])
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test score: 0.054517130958503
Test accuracy: 0.9657


In [9]:
layer_list = ['dense0','dense1','dense2','dense3','dense']
kernel_list = []
for layer_id in layer_list:
    layer = model.get_layer(name=layer_id)
    kernel = layer.get_kernel().eval()
    kernel_list.append(kernel)

In [11]:
for kernel in kernel_list:
    print(kernel.shape)
import pickle
with open('kernel_matrix_3hidden_1024_9657.pkl', 'wb') as f:
    pickle.dump(kernel_list, f)
    f.close()

(784, 1024)
(1024, 1024)
(1024, 1024)
(1024, 1024)
(1024, 10)
