# Kaggle Digit Recognition Problem
## CNN

### Import all necessary libraries and modules

In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering('th')

seed = 7
np.random.seed(seed)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### Import data, split into inputs and outputs, normalize and reshape from 1D vector to 3D matrix

In [2]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

X_train = train_data.iloc[:,1:].values.astype('float32')
Y_train = train_data.iloc[:,0].values.astype('int32')
X_test = test_data.values.astype('float32')

X_train = X_train / 255
X_test = X_test / 255

X_train = X_train.reshape(X_train.shape[0],1, 28, 28)
X_test = X_test.reshape(X_test.shape[0],1, 28, 28)

### One hot encode the output variable

In [3]:
Y_train = np_utils.to_categorical(Y_train)

In [4]:
print(Y_train.shape)

(42000, 10)


### Define baseline model (Convolutional neural network)

In [5]:
def baseline_model():
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(5,5), input_shape = (1,28,28), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    #Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

### Fit model on training data and evaluate on training data as well

In [6]:
model = baseline_model()

model.fit(X_train, Y_train, epochs = 10, batch_size = 200, verbose = 2)

Epoch 1/10
 - 45s - loss: 0.2767 - acc: 0.9200
Epoch 2/10
 - 44s - loss: 0.0848 - acc: 0.9747
Epoch 3/10
 - 44s - loss: 0.0581 - acc: 0.9827
Epoch 4/10
 - 45s - loss: 0.0458 - acc: 0.9856
Epoch 5/10
 - 44s - loss: 0.0368 - acc: 0.9886
Epoch 6/10
 - 44s - loss: 0.0308 - acc: 0.9903
Epoch 7/10
 - 45s - loss: 0.0262 - acc: 0.9919
Epoch 8/10
 - 46s - loss: 0.0221 - acc: 0.9928
Epoch 9/10
 - 45s - loss: 0.0182 - acc: 0.9944
Epoch 10/10
 - 45s - loss: 0.0155 - acc: 0.9950
CNN Error: 0.20%


In [9]:
scores = model.evaluate(X_train, Y_train, verbose = 0)

#print("CNN Error: %.2f%%" % (100-scores[1]*100))
print('Accuracy:', scores[1])

Accuracy: 0.9980238095238095


### Make predictions on our test data and save to .csv format

In [7]:
#preds = model.predict_classes(X_test, verbose=0)
#print(preds.shape)
#print(preds)
#my_csv = pd.DataFrame(preds)
#my_csv.to_csv('cnn-submission.csv')

(28000,)
[2 0 9 ... 3 9 2]


### Our predictions scores 98.8% on Kaggle