In [1]:
import time
import numpy as np
import matplotlib
matplotlib.use('Agg')

import plotly.plotly as py
import plotly.graph_objs as go

from matplotlib import pyplot as plt
from keras.utils import np_utils
import keras.callbacks as cb
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.datasets import mnist

Using Theano backend.


I'm not using many of these functions, but I'll just keep them here for reference

In [2]:
class LossHistory(cb.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        batch_loss = logs.get('loss')
        self.losses.append(batch_loss)

def load_data():
    print 'Loading data...'
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    X_train /= 255
    X_test /= 255

    y_train = np_utils.to_categorical(y_train, 10)
    y_test = np_utils.to_categorical(y_test, 10)

    X_train = np.reshape(X_train, (60000, 784))
    X_test = np.reshape(X_test, (10000, 784))

    print 'Data loaded.'
    return [X_train, X_test, y_train, y_test]


def init_model():
    start_time = time.time()
    print 'Compiling Model ... '
    model = Sequential()
    model.add(Dense(500, input_dim=784))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(300))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(10))
    model.add(Activation('softmax'))

    rms = RMSprop()
    model.compile(loss='categorical_crossentropy', optimizer=rms,
      metrics=['accuracy'])
    print 'Model compiled in {0} seconds'.format(time.time() - start_time)
    return model


def run_network(data=None, model=None, epochs=20, batch=256):
    try:
        start_time = time.time()
        if data is None:
            X_train, X_test, y_train, y_test = load_data()
        else:
            X_train, X_test, y_train, y_test = data

        if model is None:
            model = init_model()

        history = LossHistory()

        print 'Training model...'
        model.fit(X_train, y_train, nb_epoch=epochs, batch_size=batch,
                  callbacks=[history],
                  validation_data=(X_test, y_test), verbose=2)

        print "Training duration : {0}".format(time.time() - start_time)
        score = model.evaluate(X_test, y_test, batch_size=16)

        print "Network's test score [loss, accuracy]: {0}".format(score)
        return model, history.losses
    except KeyboardInterrupt:
        print ' KeyboardInterrupt'
        return model, history.losses


def predict(model, images):
  """
  Takes an array of images. Obviously dimensions must match training set.
  """
  return model.predict_classes(images)


def display_classes(png, images, classes, ncol=4):
  """
  Draw a number of images and their predictions
  Example:
  images = data[1][:12]
  classes = model.predict_classes('classes.png', images)
  """
  fig = plt.figure()
  nrow = len(images) / ncol
  if len(images) % ncol > 0: nrow = nrow + 1

  def draw(i):
    plt.subplot(nrow,ncol,i)
    plt.imshow(images[i].reshape(28,28), cmap='gray', interpolation='none')
    plt.title('Predicted: %s' % classes[i])
  [ draw(i) for i in range(0,len(images)) ]
  plt.tight_layout()
  plt.savefig(png)

def plot_losses(png, losses):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(losses)
    ax.set_title('Loss per batch')
    plt.savefig(png)

First, I'll load my data

In [4]:

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_test /= 255

y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

#X_train = np.reshape(X_train, (60000, 784))
#X_test = np.reshape(X_test, (10000, 784))

(60000,)

In [8]:
import pandas as pd

X_train.to_csv('xtrain.csv')
X_test.to_csv('xtest.csv')
y_train.to_csv('ytrain.csv')
y_test.to_csv('ytest.csv')

AttributeError: 'numpy.ndarray' object has no attribute 'to_csv'

As a test, Here is an example of the image viewing function I would use if I needed to check out the numbers:

In [57]:
data = [
    go.Heatmap(
        z=addnoise(np.rot90(X_train[0].transpose()))
    )
]

layout = go.Layout(
    height = 300,
    width = 300,
    autosize = False,
    showlegend = False
)

fig = go.Figure(data = data, layout = layout)

py.iplot(fig, filename='mnist1')

Next, I'm including a function that adds noise to an image. To do this, I'm creating a random matrix, which is a collection of 0's, 1 positve standard deviation, and 1 negative standard deviation. This matrix is basically a random on-off switch, marking which points I'd like to add noise to. Then, I have a second random matrix where I sample from the uniform distribution. This is a random magnitude element. I multiply these two to get my noise, and add the noise to the image matrix.

In [56]:
def addnoise(imagemat):
    mean = np.reshape(imagemat, 784).mean()
    std = np.reshape(imagemat, 784).std()
    randmat = np.random.choice([0,std,-std], size = 784, replace = True, p=[0.75, 0.125, 0.125]).reshape(28, 28)
    unimat = np.random.uniform(0,1,784).reshape(28, 28)
    return imagemat + randmat*unimat

noisyX_train = []
for image in X_train:
    noisyX_train.append(addnoise(image).reshape(784))
noisyX_train = np.array(noisyX_train)
#noisyX_train /= 255

noisyX_test = []
for image in X_test:
    noisyX_test.append(addnoise(image).reshape(784))
noisyX_test = np.array(noisyX_test)
#noisyX_test /= 255

Next I'm going to create my model. In this case I'm using the same model from the sample code.

In [169]:
model1 = Sequential()
model1.add(Dense(500, input_dim=784))
model1.add(Activation('relu'))
model1.add(Dropout(0.4))
model1.add(Dense(300))
model1.add(Activation('relu'))
model1.add(Dropout(0.4))
model1.add(Dense(10))
model1.add(Activation('softmax'))

rms = RMSprop()
model1.compile(loss='categorical_crossentropy', optimizer=rms,
              metrics=['accuracy'])

I'm also going to use bagging in an attempt to improve my model's performance. This bagging function goes through an image group and selects random images for a test set. I could use this function to create as many test sets as I want for an ensemble model.

In [180]:
def bagit(images, y_train):
    newx = []
    newy = []
    for i in np.arange(0,60000):
        j = np.random.choice(np.arange(0,60000), replace = True)
        newx.append(images[j,:])
        newy.append(y_train[j,:])
    newx = np.array(newx)
    newy = np.array(newy)
    print 'going'
    return (newx,newy)

X_1,y_1 = bagit(noisyX_train, y_train)
X_2,y_2 = bagit(noisyX_train, y_train)
X_3,y_3 = bagit(noisyX_train, y_train)
X_4,y_4 = bagit(noisyX_train, y_train)
X_5,y_5 = bagit(noisyX_train, y_train)
X_6,y_6 = bagit(noisyX_train, y_train)
X_7,y_7 = bagit(noisyX_train, y_train)
X_8,y_8 = bagit(noisyX_train, y_train)
X_9,y_9 = bagit(noisyX_train, y_train)

going
going
going
going
going
going
going
going
going


Next, I'll use similar code to the above to fit my models. After calculating each fit, I'll use the model to create a y_pred variable that will be added to the ensemble. Right now I only have this including two models (these take a long time to run), but using this method I could easily get 9 y_preds to match my random bagged training sets above.

In [181]:
history = LossHistory()
epochs=20
batch=256


model1.fit(X_1, y_1, nb_epoch=epochs, batch_size=batch,
          callbacks=[history],
          validation_data=(noisyX_test, y_test), verbose=2)
y_pred1 = model1.predict(noisyX_test, verbose = False)


model1.fit(X_2, y_2, nb_epoch=epochs, batch_size=batch,
          callbacks=[history],
          validation_data=(noisyX_test, y_test), verbose=2)
y_pred2 = model1.predict(noisyX_test, verbose = False)

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
38s - loss: 0.1115 - acc: 0.9711 - val_loss: 0.1302 - val_acc: 0.9707
Epoch 2/20
15s - loss: 0.0842 - acc: 0.9768 - val_loss: 0.1250 - val_acc: 0.9712
Epoch 3/20
15s - loss: 0.0694 - acc: 0.9804 - val_loss: 0.1391 - val_acc: 0.9693
Epoch 4/20
15s - loss: 0.0604 - acc: 0.9828 - val_loss: 0.1401 - val_acc: 0.9711
Epoch 5/20
15s - loss: 0.0517 - acc: 0.9849 - val_loss: 0.1321 - val_acc: 0.9734
Epoch 6/20
15s - loss: 0.0462 - acc: 0.9870 - val_loss: 0.1442 - val_acc: 0.9708
Epoch 7/20
15s - loss: 0.0440 - acc: 0.9876 - val_loss: 0.1451 - val_acc: 0.9723
Epoch 8/20
15s - loss: 0.0382 - acc: 0.9890 - val_loss: 0.1431 - val_acc: 0.9734
Epoch 9/20
15s - loss: 0.0367 - acc: 0.9897 - val_loss: 0.1445 - val_acc: 0.9735
Epoch 10/20
15s - loss: 0.0323 - acc: 0.9909 - val_loss: 0.1589 - val_acc: 0.9734
Epoch 11/20
15s - loss: 0.0332 - acc: 0.9912 - val_loss: 0.1585 - val_acc: 0.9735
Epoch 12/20
16s - loss: 0.0317 - acc: 0.9913 - val_loss: 

I'm defining a helper function called "arraytonumber". The y_test data is in the form of 0 or 1 matrices, with a single '1' in the position corresponding to what the digit is. The result of my models are in a similar form, only these are probabilities. This will come in handy later.

In [None]:
def arraytonumber(arr):
    check = 0
    index = 0
    for i in np.arange(0,10):
        if arr[i] > check:
            index = i
            check = arr[i]
    return index

Now I could have my ensembles "vote" on what the prediction would be. To do this, I simply take the mean of each of the probabilities to come up with a single probability prediction.

In [182]:
y_pred = np.array([y_pred1, y_pred2]).mean(axis = 0)

y_pred_c = np.apply_along_axis(arraytonumber, 1, y_pred)

y_test_c = np.apply_along_axis(arraytonumber, 1, y_test)

Now, lets see how I did. I'm using an ensemble strategy, so I can't use Keras' built in functions to check my models' health. I'll have to manually calculate my accuracy.

In [None]:
comp = (y_pred_c == y_test_c)
vint = np.vectorize(int)
vcomp = vint(comp)

float(vcomp.sum()) / float(len(vcomp))

So far with two models I ended up at 97.5%. This is still not beating what Prof Rowe came up with using a single neural net without noise, so I'll have to keep on trying to beef up my accuracy...