In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras import models
from keras import layers
from keras.utils import to_categorical
from sklearn.model_selection import  train_test_split

%matplotlib inline


Using TensorFlow backend.


In [2]:
train = pd.read_csv('datasets/train.csv')
test = pd.read_csv('datasets/test.csv')

In [3]:
y_train = train['label']
x_train = train.drop(labels = ['label'], axis =1)



In [4]:
#Normalising the data
x_train = x_train/255.0
x_train = x_train.values.reshape(-1, 28, 28, 1)

test = test/255.0
test = test.values.reshape(-1,28,28,1)


In [5]:
#coverting y to one-hot 
y_train = to_categorical(y_train, num_classes=10)

In [6]:
x_train, x_val, Y_train, Y_val = train_test_split(x_train, y_train, test_size = 0.1, random_state = 2)

In [7]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3) , activation = 'relu', input_shape = (28,28,1)))
model.add(layers.MaxPooling2D(3,3))
model.add(layers.Dropout(0.5))
model.add(layers.Conv2D(64, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Dropout(0.5))
model.add(layers.Conv2D(64, (3,3), activation = 'relu'))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation = 'relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation = 'softmax'))


In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 6, 6, 64)          18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 64)          0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 3, 3, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 1, 1, 64)          36928     
__________

In [9]:
model.compile(optimizer='adam', loss = 'categorical_crossentropy', metrics = ['acc'])

In [10]:
history = model.fit(x_train, Y_train, epochs =20, batch_size=128, validation_data = (x_val, Y_val), verbose=2)

Train on 37800 samples, validate on 4200 samples
Epoch 1/20
 - 14s - loss: 0.8454 - acc: 0.7138 - val_loss: 0.1679 - val_acc: 0.9507
Epoch 2/20
 - 15s - loss: 0.2689 - acc: 0.9182 - val_loss: 0.1124 - val_acc: 0.9636
Epoch 3/20
 - 13s - loss: 0.1996 - acc: 0.9401 - val_loss: 0.0883 - val_acc: 0.9721
Epoch 4/20
 - 13s - loss: 0.1681 - acc: 0.9505 - val_loss: 0.0787 - val_acc: 0.9757
Epoch 5/20
 - 14s - loss: 0.1498 - acc: 0.9554 - val_loss: 0.0639 - val_acc: 0.9810
Epoch 6/20
 - 14s - loss: 0.1332 - acc: 0.9613 - val_loss: 0.0605 - val_acc: 0.9817
Epoch 7/20
 - 16s - loss: 0.1216 - acc: 0.9642 - val_loss: 0.0561 - val_acc: 0.9814
Epoch 8/20
 - 16s - loss: 0.1114 - acc: 0.9668 - val_loss: 0.0540 - val_acc: 0.9833
Epoch 9/20
 - 15s - loss: 0.1047 - acc: 0.9680 - val_loss: 0.0490 - val_acc: 0.9850
Epoch 10/20
 - 15s - loss: 0.0999 - acc: 0.9701 - val_loss: 0.0480 - val_acc: 0.9850
Epoch 11/20
 - 15s - loss: 0.0936 - acc: 0.9716 - val_loss: 0.0463 - val_acc: 0.9855
Epoch 12/20
 - 15s - loss

In [11]:
results = model.predict(test)

In [12]:
results = np.argmax(results, axis=1)
results = pd.Series(results, name='label')

In [13]:
results

0        2
1        0
2        9
3        0
4        3
5        7
6        0
7        3
8        0
9        3
10       5
11       7
12       4
13       0
14       4
15       3
16       3
17       1
18       9
19       0
20       9
21       1
22       1
23       5
24       7
25       4
26       2
27       7
28       4
29       7
        ..
27970    5
27971    0
27972    4
27973    8
27974    0
27975    3
27976    6
27977    0
27978    1
27979    9
27980    3
27981    1
27982    1
27983    0
27984    4
27985    5
27986    2
27987    2
27988    9
27989    6
27990    7
27991    6
27992    1
27993    9
27994    7
27995    9
27996    7
27997    3
27998    9
27999    2
Name: label, Length: 28000, dtype: int64