Importing important function from libraries

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
import tensorflow as tf
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Dense
from keras.utils.np_utils import to_categorical
from keras.utils import np_utils
import matplotlib.pyplot as plt
import pandas as pd
import math

Training Data with 42000 images of 28*28 = 784 pixels each

In [6]:
train = pd.read_csv("/content/drive/MyDrive/Digit_Recognizer/train.csv")
print(train.shape)

(42000, 785)


Test Data with 28000 images of 28*28=784 pixels each

In [7]:
test = pd.read_csv("/content/drive/MyDrive/Digit_Recognizer/test.csv")
print(test.shape)

(28000, 784)


x_train is the input training data

y_train is the label/output of each input training image

x_test is the input test data

In [8]:
x_train = train.iloc[:, 1:785].values
y_train = train.iloc[:, 0].values
x_test = test.iloc[:, 0:784].values
print(x_train.shape)
print(y_train.shape)

(42000, 784)
(42000,)


Dividing the images by 255 to normalize them

In [9]:
seed = 5
np.random.seed(seed)
x_train = x_train/255.0
x_test = x_test/255.0
print(x_train.shape)

(42000, 784)


We want images in a 28*28 size array

In [10]:
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
print(x_train.shape)

(42000, 28, 28, 1)


Defining some constants

In [11]:
num_classes = 10          #We can have 10 different ouptuts
batch_size = 64           
epochs = 40               
input_shape = (28,28,1)   #Size of input image

Categorizing all outputs in 10 different possibilities

In [12]:
y_train = to_categorical(y_train, num_classes)
print(y_train.shape)

(42000, 10)


In [13]:
from sklearn.model_selection import train_test_split
x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train, test_size = 0.2, random_state=seed)

**MODEL**

In [14]:
model = Sequential()
model.add(Conv2D(32, kernel_size = (3,3), input_shape= input_shape, activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(32, (3,3), activation = 'relu'))
model.add(MaxPooling2D())
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(512,activation = 'relu'))
model.add(Dense(512, activation = 'relu'))
model.add(Dense(num_classes, activation = 'sigmoid'))

In [15]:
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [16]:
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 5, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 800)               0

In [17]:
model.fit(x_train, y_train, epochs = epochs, verbose = 1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7f3b300a32d0>

In [18]:
loss , accuracy = model.evaluate(x_validation, y_validation, verbose = 0)
print("Loss : ",loss, "Accuracy : ", accuracy)

Loss :  0.0714588537812233 Accuracy :  0.9885714054107666


So, we got an accuracy of 98.88 percent and a loss of 0.7 % which is alright

In [19]:
predicted_classes = model.predict_classes(x_test)



In [20]:
submissions=pd.DataFrame({"ImageId": list(range(1,len(predicted_classes)+1)), "Label": predicted_classes})

In [21]:
submissions.to_csv("submission.csv", index = False, header = True)

In [22]:
from google.colab import files
files.download('submission.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>