# MNIST Neural Network using Keras and Sckit Learn

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pp
%matplotlib inline

In [2]:
df_train = pd.read_csv('train.csv', dtype=np.int32)

In [3]:
X = df_train.iloc[:,1:].values
y = df_train.iloc[:,0].values

In [None]:
# Visualize some random data
idx = np.random.randint(low=0, high =X.shape[0], size=16)
sel = y[idx]
sel

In [None]:
sel = X[idx, :]
sel

In [None]:
def displayData(X):
    width=int(np.round(np.sqrt(X.shape[1])))
    (m,n) = X.shape
    height = int(n/width)
    
    display_rows = int(np.floor(np.sqrt(m)))
    display_cols = int(np.ceil(m/display_rows))
    
    fig, axes = pp.subplots( nrows=display_rows, ncols=display_cols, figsize=(20,10))
    pp.subplots_adjust(hspace = 0.01, wspace=0.01)
    k = 0
    for i in range(display_rows):
        for j in range(display_cols):
            axes[i,j].imshow(X[k].reshape(height, width), cmap='gray')
            axes[i,j].set_xticks([])
            axes[i,j].set_yticks([])
            axes[i,j].set_xticklabels([])
            axes[i,j].set_yticklabels([])
            k = k + 1

In [None]:
displayData(sel)

In [4]:
# First try with a normal ANN (without data augumentation and convolution or regularization)
from sklearn.model_selection import train_test_split

In [5]:
X_train, X_cv, y_train, y_cv = train_test_split(X, y, test_size=0.3)

In [6]:
# normalize inputs from 0-255 to 0-1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)



MinMaxScaler(copy=True, feature_range=(0, 1))

In [7]:
X_train = scaler.transform(X_train)
X_cv = scaler.transform(X_cv)

In [8]:
# For Multi class classification we need to one hot encode the labels (outputs)
from keras.utils import np_utils

Using TensorFlow backend.


In [9]:
y_train = np_utils.to_categorical(y_train)
y_cv = np_utils.to_categorical(y_cv)

In [10]:
# target number of classes
num_classes = y_train.shape[1]

In [11]:
# import required keras classes for our keras model
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [12]:
num_pixels = X.shape[1]
def createANNModel():
    model = Sequential()
    model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
    model.add(Dropout(rate=0.2))
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
model = createANNModel()
model.fit(X_train, y_train, validation_data = (X_cv, y_cv), epochs=10, batch_size=100, verbose=2)


In [None]:
scores = model.evaluate(X_cv, y_cv,verbose=0)
print('Error :{0:0.2f} %'.format(100 - (scores[1]* 100)))

In [13]:
# Use Convolution Neural Network
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D

In [14]:
# We need to rehsape our data in the following format
width = int(np.round(np.sqrt(num_pixels)))
height = int(np.round(num_pixels/width))

# We already have scaled our data above
# (samples, channels, width, height)
# channel = 1 (grayscale), 3 (RGB), 4 (RGBA)
X_train = X_train.reshape((X_train.shape[0], 1, width,height ))
X_cv = X_cv.reshape((X_cv.shape[0], 1, width, height))

1. The first hidden layer is a convolutional layer called a Convolution2D. The layer has 32 feature maps, which with the size of 5×5 and a rectifier activation function. This is the input layer, expecting images with the structure (channels, width, height)
2. Next we define a pooling layer that takes the max called MaxPooling2D. It is configured with a pool size of 2×2.
3. The next layer is a regularization layer using dropout called Dropout. It is configured to randomly exclude 20% of neurons in the layer in order to reduce overfitting.
4. Next is a layer that converts the 2D matrix data to a vector called Flatten. It allows the output to be processed by standard fully connected layers.
5. Next a fully connected layer with 128 neurons and rectifier activation function.
6. Finally, the output layer has 10 neurons for the 10 classes and a softmax activation function to output probability-like predictions for each class.

In [17]:
def createCNNModel():
    model = Sequential()
    model.add(Conv2D(32, (5,5), strides=(1,1), data_format='channels_first', activation='relu', input_shape=(1, width, height)))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    #model.add(Dense(64, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
    

In [18]:
model = createCNNModel()
model.fit(X_train, y_train, validation_data=(X_cv, y_cv), epochs=20, batch_size=1000, verbose=2)

Train on 29400 samples, validate on 12600 samples
Epoch 1/20
 - 54s - loss: 0.8068 - acc: 0.7752 - val_loss: 0.3354 - val_acc: 0.8995
Epoch 2/20
 - 53s - loss: 0.2896 - acc: 0.9160 - val_loss: 0.2406 - val_acc: 0.9298
Epoch 3/20
 - 54s - loss: 0.2078 - acc: 0.9412 - val_loss: 0.1771 - val_acc: 0.9483
Epoch 4/20
 - 54s - loss: 0.1526 - acc: 0.9558 - val_loss: 0.1366 - val_acc: 0.9613
Epoch 5/20
 - 53s - loss: 0.1170 - acc: 0.9676 - val_loss: 0.1191 - val_acc: 0.9656
Epoch 6/20
 - 54s - loss: 0.0967 - acc: 0.9716 - val_loss: 0.0935 - val_acc: 0.9744
Epoch 7/20
 - 53s - loss: 0.0811 - acc: 0.9763 - val_loss: 0.0832 - val_acc: 0.9765
Epoch 8/20
 - 53s - loss: 0.0681 - acc: 0.9809 - val_loss: 0.0776 - val_acc: 0.9788
Epoch 9/20
 - 53s - loss: 0.0614 - acc: 0.9826 - val_loss: 0.0712 - val_acc: 0.9800
Epoch 10/20
 - 53s - loss: 0.0556 - acc: 0.9844 - val_loss: 0.0670 - val_acc: 0.9817
Epoch 11/20
 - 53s - loss: 0.0491 - acc: 0.9854 - val_loss: 0.0638 - val_acc: 0.9826
Epoch 12/20
 - 55s - los

<keras.callbacks.History at 0x17a94001470>

In [None]:
scores = model.evaluate(X_cv, y_cv,verbose=0)
print('Error :{0:0.2f} %'.format(100 - (scores[1]* 100)))

In [None]:
model.save('Mnist_cnn.h5')

In [None]:
# Load our previously saved model
from keras.models import load_model

In [None]:
model = load_model('Mnist_cnn.h5')

In [None]:
df_test = pd.read_csv('test.csv', dtype=np.int32)
X_test = df_test.iloc[:,:].values

In [None]:
X_test= scaler.transform(X_test)

In [None]:
X_test= X_test.reshape((X_test.shape[0], 1, width, height))

In [None]:
y_pred = model.predict(X_test, verbose=0)

In [None]:
# suppress scientific notations while priting
np.set_printoptions(suppress=True, precision=2)

print(y_pred[:2, :])

In [None]:
predictions = np.argmax( y_pred , axis =1)
predictions[:5]

In [None]:
# Submission
submission = np.column_stack((np.arange(1, df_test.shape[0] + 1), predictions))
submission = pd.DataFrame(data=submission, columns=['ImageId', 'Label'])

In [None]:
submission.head()

In [None]:
submission.to_csv('submission.csv', index= False)