# MNIST Neural Network using Keras and Sckit Learn

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pp
%matplotlib inline

In [2]:
df_train = pd.read_csv('train.csv', dtype=np.int32)

In [3]:
X = df_train.iloc[:,1:].values
y = df_train.iloc[:,0].values

In [None]:
# Visualize some random data
idx = np.random.randint(low=0, high =X.shape[0], size=16)
sel = y[idx]
sel

In [None]:
sel = X[idx, :]
sel

In [None]:
def displayData(X):
    width=int(np.round(np.sqrt(X.shape[1])))
    (m,n) = X.shape
    height = int(n/width)
    
    display_rows = int(np.floor(np.sqrt(m)))
    display_cols = int(np.ceil(m/display_rows))
    
    fig, axes = pp.subplots( nrows=display_rows, ncols=display_cols, figsize=(20,10))
    pp.subplots_adjust(hspace = 0.01, wspace=0.01)
    k = 0
    for i in range(display_rows):
        for j in range(display_cols):
            axes[i,j].imshow(X[k].reshape(height, width), cmap='gray')
            axes[i,j].set_xticks([])
            axes[i,j].set_yticks([])
            axes[i,j].set_xticklabels([])
            axes[i,j].set_yticklabels([])
            k = k + 1

In [None]:
displayData(sel)

In [4]:
# First try with a normal ANN (without data augumentation and convolution or regularization)
from sklearn.model_selection import train_test_split

In [5]:
X_train, X_cv, y_train, y_cv = train_test_split(X, y, test_size=0.3)

In [6]:
# normalize inputs from 0-255 to 0-1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)



MinMaxScaler(copy=True, feature_range=(0, 1))

In [7]:
X_train = scaler.transform(X_train)
X_cv = scaler.transform(X_cv)

In [8]:
# For Multi class classification we need to one hot encode the labels (outputs)
from keras.utils import np_utils

Using TensorFlow backend.


In [9]:
y_train = np_utils.to_categorical(y_train)
y_cv = np_utils.to_categorical(y_cv)

In [10]:
# target number of classes
num_classes = y_train.shape[1]

# num of pixels
num_pixels = X_train.shape[1]

# num_samples
num_train = X_train.shape[0]
num_cv = X_cv.shape[0]

In [11]:
# Assuming a square image
width = np.sqrt(num_pixels).astype(np.int32)
height = int(num_pixels/width)

# reshape our inputs for image augumentation and convolutions
# format (num_samples, )
X_train = X_train.reshape((num_train, 1, height, width))
X_cv = X_cv.reshape((num_cv, 1, height, width))

In [12]:
# Data Augumentation using Image Generator
from keras.preprocessing.image import ImageDataGenerator

In [14]:
datagen = ImageDataGenerator(
    featurewise_center=False,
    featurewise_std_normalization=False,
    shear_range=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    data_format='channels_first')

In [15]:
# Fit our generator to training set
datagen.fit(X_train, augment=False)

In [16]:
# import the required classes for our model
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

In [17]:
X_train.shape

(29400, 1, 28, 28)

<b>The network topology</b>
1. Convolutional layer with 30 feature maps of size 5×5.
2. Pooling layer taking the max over 2*2 patches.
3. Convolutional layer with 15 feature maps of size 3×3.
4. Pooling layer taking the max over 2*2 patches.
5. Dropout layer with a probability of 20%.
6. Flatten layer.
7. Fully connected layer with 128 neurons and rectifier activation.
8. Fully connected layer with 50 neurons and rectifier activation.
9. Output layer.

In [18]:
def createDeepModel():
    model = Sequential()
    model.add(Conv2D(32,(5,5), strides=(1,1),activation='relu', data_format='channels_first', input_shape=(1, height, width)))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(1,1)))
    #model.add(Conv2D(16, (3,3), strides=(1,1), activation='relu', data_format='channels_first'))
    #model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu',kernel_initializer='normal'))
    #model.add(Dense(64, activation='relu', kernel_initializer='normal'))
    model.add(Dense(num_classes, activation='softmax', kernel_initializer='normal'))
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [19]:
# fit our model to our image generator
model= createDeepModel()

In [22]:
# Try without Augumentation first
model.fit(X_train, y_train, batch_size=1000, epochs=10, verbose=2, validation_data=(X_cv, y_cv), shuffle=False)

Train on 29400 samples, validate on 12600 samples
Epoch 1/10
 - 73s - loss: 0.7109 - acc: 0.7985 - val_loss: 0.2624 - val_acc: 0.9262
Epoch 2/10
 - 72s - loss: 0.2068 - acc: 0.9410 - val_loss: 0.1578 - val_acc: 0.9563
Epoch 3/10
 - 72s - loss: 0.1294 - acc: 0.9638 - val_loss: 0.1186 - val_acc: 0.9677
Epoch 4/10
 - 72s - loss: 0.0925 - acc: 0.9732 - val_loss: 0.0975 - val_acc: 0.9721
Epoch 5/10
 - 73s - loss: 0.0716 - acc: 0.9804 - val_loss: 0.0859 - val_acc: 0.9755
Epoch 6/10
 - 72s - loss: 0.0581 - acc: 0.9837 - val_loss: 0.0763 - val_acc: 0.9779
Epoch 7/10
 - 72s - loss: 0.0487 - acc: 0.9866 - val_loss: 0.0749 - val_acc: 0.9777
Epoch 8/10
 - 72s - loss: 0.0421 - acc: 0.9880 - val_loss: 0.0730 - val_acc: 0.9793
Epoch 9/10
 - 72s - loss: 0.0366 - acc: 0.9891 - val_loss: 0.0708 - val_acc: 0.9795
Epoch 10/10
 - 72s - loss: 0.0318 - acc: 0.9909 - val_loss: 0.0676 - val_acc: 0.9810


<keras.callbacks.History at 0x24e49730a58>

In [None]:
#batch_size = 100
#model.fit_generator(datagen.flow(X_train, y_train, batch_size=1000), \
                    steps_per_epoch= 3000, epochs=10, verbose=2, validation_data=(X_cv, y_cv))