<h2>Project: Digits Recognizer</h2>

In [26]:
# Package Imports
import numpy as np
import pandas as pd
import csv as csv

from sklearn.cross_validation import train_test_split
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.layers import Convolution2D, MaxPooling2D

np.random.seed(123)  # for reproducibility

In [4]:
# Load the training data
data = pd.read_csv('train.csv')
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Split the data into target and features
target = data['label']
features = data.drop('label', axis=1)

In [6]:
# Method to reshape the features into 28x28 dimensions
# with one channel
def reshapeFeatures(features):
    features_as_matrix = features.as_matrix()
    features_as_matrix = features_as_matrix.reshape(features_as_matrix.shape[0],28,28,1) 
    return features_as_matrix

In [7]:
# Method to reshape the target variables into categorical values
def reshapeTarget(target):
    target_vector = target.values.reshape(1,target.shape[0])
    target_categorical = np_utils.to_categorical(target_vector, 10)
    return target_categorical

In [10]:
# Split the data into training and testing set holding 20% for test data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.1, random_state=42)

In [11]:
# Preprocess input data
X_train = reshapeFeatures(X_train)
X_test = reshapeFeatures(X_test)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [12]:
# Preprocess class labels
Y_train = reshapeTarget(y_train)
Y_test = reshapeTarget(y_test)

In [13]:
# Define model architecture
model = Sequential()
 
model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(28, 28, 1)))
model.add(Convolution2D(32, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
 
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [14]:
# Compile model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [15]:
# Fit model on training data
model.fit(X_train, Y_train, batch_size=32, nb_epoch=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11289f9d0>

In [16]:
# Evaluate model on test data
score = model.evaluate(X_test, Y_test, verbose=0)
print score

[0.031354287583557641, 0.98928571428571432]


In [17]:
# Load the testing data
features_test = pd.read_csv('test.csv')
features_test.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
# Preprocess test input data
features_test = reshapeFeatures(features_test)
features_test = features_test.astype('float32')
features_test /= 255

In [22]:
# Predict classes
prediction = model.predict_classes(features_test, batch_size=32, verbose=0)
print prediction[:10]

[2 0 9 0 3 7 0 3 0 3]


In [27]:
# Write our predictions in myDigitsRecognizer.csv file
ImageId = range(1,28001)
predictions_file = open('myDigitsRecognizer.csv', 'wb')
open_file_object = csv.writer(predictions_file)
open_file_object.writerow(['ImageId','Label'])
open_file_object.writerows(zip(ImageId, prediction))
predictions_file.close()
print 'done'

done
