In [32]:
from keras.models import Sequential
import numpy as np
import pandas as pd
from keras.utils import np_utils

## load in mnist datasets 
Our neural-network is going to take a single vector for each training example, so we need to reshape the input so that each 28x28 image becomes a single 784 dimensional vector. We'll also scale the inputs to be in the range [0-1] rather than [0-255]

In [11]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.pkl.gz


In [29]:
X_test.shape

(10000, 28, 28)

In [30]:
# Format the data for training
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print("Training matrix shape", X_train.shape)
print("Testing matrix shape", X_test.shape)

Training matrix shape (60000, 784)
Testing matrix shape (10000, 784)


Modify the target matrices to be in the one-hot format

In [42]:
nb_classes = 10
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

## Build the neural network

In [22]:
model = Sequential()

In [23]:
from keras.layers import Dense, Activation
#first hidden layer
model.add(Dense(output_dim =500,
                input_dim = 28*28))
model.add(Activation('sigmoid'))
# second hidden layer
model.add(Dense(output_dim = 500))
model.add(Activation('sigmoid'))
#output layer
model.add(Dense(output_dim=10))
model.add(Activation('softmax'))

In [33]:
from keras.optimizers import SGD
model.compile(loss='mse',
             optimizer=SGD(lr=0.1),
             metrics=['accuracy'])

In [38]:
model.fit(X_train, Y_train, batch_size=100, nb_epoch=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x29b825534a8>

In [39]:
score = model.evaluate(X_test,Y_test)
print('Total loss',score[0])
print('Accuracy',score[1])

Accuracy 0.7011


In [40]:
result = model.predict(X_test)

## Choosing Proper Loss


In [44]:
model2 = Sequential()
#first hidden layer
model2.add(Dense(output_dim =500,
                input_dim = 28*28))
model2.add(Activation('sigmoid'))
# second hidden layer
model2.add(Dense(output_dim = 500))
model2.add(Activation('sigmoid'))
#output layer
model2.add(Dense(output_dim=10))
model2.add(Activation('softmax'))

model2.compile(loss='categorical_crossentropy',
             optimizer=SGD(lr=0.1),
             metrics=['accuracy'])

In [45]:
model2.fit(X_train, Y_train, batch_size=100, nb_epoch=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x29b821da668>

In [46]:
score2 = model2.evaluate(X_test,Y_test)
print('Total loss',score2[0])
print('Accuracy',score2[1])

Accuracy 0.9406


## Use ReLU for activation
1. Fast to compute
2. Biological reason
3. Infinite Sigmoid with different biases
4. No vanishing gradient problem

In [51]:
model3 = Sequential()
#first hidden layer
model3.add(Dense(output_dim =500,
                input_dim = 28*28))
model3.add(Activation('relu'))
# second hidden layer
model3.add(Dense(output_dim = 500))
model3.add(Activation('relu'))
#output layer
model3.add(Dense(output_dim=10))
model3.add(Activation('softmax'))

model3.compile(loss='categorical_crossentropy',
             optimizer=SGD(lr=0.1),
             metrics=['accuracy'])

In [52]:
model3.fit(X_train, Y_train, batch_size=100, nb_epoch=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x29b8460c8d0>

In [53]:
score3 = model3.evaluate(X_test,Y_test)
print('Total loss',score3[0])
print('Accuracy',score3[1])

Accuracy 0.9806
