In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv
/kaggle/input/digit-recognizer/sample_submission.csv


In [2]:
#import all useful libraries
#Data Processing libraries
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator               # used for data augmentation
#ML Libraries
import tensorflow as tf
import keras 
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Conv2D, AveragePooling2D, Flatten, Dropout

Using TensorFlow backend.


In [3]:
# read train and test data
train = pd.read_csv("../input/digit-recognizer/train.csv")
test = pd.read_csv("../input/digit-recognizer/test.csv")

In [4]:
#inspect the shape of the dataset
print(train.shape)
print(test.shape)
# 784 = pixels of a 28x28 image
# 785 = pixels of a 28x28 image + class of the data
# The large dimention is the dimention with no of examples

(42000, 785)
(28000, 784)


In [5]:
# split into x and y
y_train = train['label']
x_train = train.drop(labels = ['label'],axis = 1)

#clear up the memory
del train

# print the y_train which has the class of each data
y_train

0        1
1        0
2        1
3        4
4        0
        ..
41995    0
41996    1
41997    7
41998    6
41999    9
Name: label, Length: 42000, dtype: int64

In [6]:
# scale values to between 0 and 1 for faster learning
x_train = x_train/255

# you want number of imput channels to be last index for this version of keras
image_size = int(np.sqrt(x_train.shape[1]))

ip_shape = (image_size, image_size, 1)
x_train = x_train.values.reshape(x_train.shape[0], image_size, image_size, 1)

# convert y to one hot vectors for training
y_train = keras.utils.np_utils.to_categorical(y_train.values, num_classes=10)

In [7]:
y_train

array([[0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [8]:
print('x_train.shape = ', x_train.shape)
print('y_train.shape = ', y_train.shape)

# 42000 = number of examples
# 28    = no of pixels
# 1     = number of channels ( the image is black/white so has only one channel)
# 10    = number of classes

x_train.shape =  (42000, 28, 28, 1)
y_train.shape =  (42000, 10)


In [9]:
# process the test model similarly
test = test/255
test = test.values.reshape(test.shape[0], image_size, image_size, 1)

print(test.shape)

(28000, 28, 28, 1)


In [10]:
# Keep aside a part of the training set (10000 examples ) for development
x_train, x_dev, y_train, y_dev = train_test_split(x_train, y_train, test_size=10000, random_state = 12)

In [11]:
#inspect the shape to make sure the train dev split was successful
print('x_train.shape = ', x_train.shape)
print('y_train.shape = ', y_train.shape)
print('x_dev.shape   = ', x_dev.shape)
print('y_dev.shape   = ', y_dev.shape)

x_train.shape =  (32000, 28, 28, 1)
y_train.shape =  (32000, 10)
x_dev.shape   =  (10000, 28, 28, 1)
y_dev.shape   =  (10000, 10)


In [12]:
# build a keras model
model = keras.Sequential()
model.add(Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=ip_shape ))
model.add(AveragePooling2D())
model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
model.add(AveragePooling2D())
model.add(Flatten())
model.add(Dense(units=120, activation='relu'))
model.add(Dense(units=84, activation='relu'))
model.add(Dense(units=10, activation = 'softmax'))
# even though the original paper did not use relu we'll use it as it is better.
# The reason why relu wasn't used on the og paper was because it was not famous at the time of writing the paper

In [13]:
# Summary of the model descriing it's structure
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 6)         60        
_________________________________________________________________
average_pooling2d_1 (Average (None, 13, 13, 6)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 11, 16)        880       
_________________________________________________________________
average_pooling2d_2 (Average (None, 5, 5, 16)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 400)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 120)               48120     
_________________________________________________________________
dense_2 (Dense)              (None, 84)               

In [14]:
# compile the model with a loss function and an optimizer
model.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = keras.optimizers.Adam(),
             metrics = ['accuracy'])

In [15]:
# train the model for 15 epochs using batches of size 128
model.fit(x_train, y_train, batch_size = 128, epochs = 15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.callbacks.History at 0x7f43d7fb6a10>

In [16]:
# crosscheck with dev set for overfitting
dev_loss, dev_metric = model.evaluate(x_dev, y_dev)
print('Accuracy = ', dev_metric)

Accuracy =  0.9854000210762024


Since the difference in performance between train and dev set is <1% overfitting is very minimal

In [17]:
# predict the results 
results = model.predict(test)
results = np.argmax(results,axis = 1)
results

array([2, 0, 9, ..., 3, 9, 2])

In [18]:
#convert the resuts into a dataframs with the appropriate ImageId
results_df = pd.DataFrame()
results_df['ImageId'] = np.arange(len(results)) + 1
results_df['Label'] = pd.Series(results)
results_df

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [19]:
#save the results as a dataframe for submission
results_df.to_csv('submission.csv', index = False)