# Digit Recognizer

###### CNN on classic dataset of handwritten images

### Importing important libraries

In [None]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#import os
#print(os.listdir("../input"))

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import tensorflow as tf

### Reading Datasets

##### Train Dataset

In [None]:
#train = pd.read_csv('../input/train.csv')
train = pd.read_csv('../train.csv')
train.head()

In [None]:
train.describe()

##### Test Dataset

In [None]:
#test = pd.read_csv("../input/test.csv")
test = pd.read_csv("../test.csv")
test.head()

In [None]:
test.describe()

### Exploratory Data Analsysis

#### Checking for NULL values

In [None]:
train.melt(id_vars="label")['value'].isnull().sum()

In [None]:
test.melt()['value'].isnull().sum()

#### Count of each labels

In [None]:
train['label'].value_counts().sort_index()

In [None]:
# Plot
plt.figure(figsize=(8, 4))
sns.set_style("whitegrid")
sns.countplot(x="label", data=train)
plt.xlabel("Label")
plt.ylabel("Count")
plt.show()

#### Dividing the training dataset into X and y

In [None]:
y = train['label']
X = train.drop(['label'], axis = 1)

#### Normalize the pixel data, i.e. converting values from 0 - 254 to 0 - 1 

In [None]:
X = X / 255
test = test / 255

#### Converting labels to numpy array

In [None]:
y = np.array(y)

#### Reshaping image to 28px X 28px dimension

In [None]:
X = X.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)

In [None]:
plt.imshow(X[0][:,:,0])

### Modelling CNN

#### Importing Stratified K-Fold

In [None]:
from sklearn.model_selection import StratifiedKFold

### Tensorflow Model

> ##### Conv2D --> MaxPool2D --> Conv2D --> MaxPool2D --> Conv2D --> MaxPool2D --> Conv2D --> MaxPool2D --> Flatten --> Dense Softmax

In [None]:
# Tensorflow Keras CNN Model
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Conv2D(32, (3,3), padding = "same", activation = "relu", input_shape = X.shape[1:]))
model.add(tf.keras.layers.MaxPool2D(2,2))

model.add(tf.keras.layers.Conv2D(64, (3,3), padding = "same", activation = "relu"))
model.add(tf.keras.layers.MaxPool2D(2,2))

model.add(tf.keras.layers.Conv2D(128, (3,3), padding = "same", activation = "relu"))
model.add(tf.keras.layers.MaxPool2D(2,2))

model.add(tf.keras.layers.Conv2D(256, (3,3), padding = "same", activation = "relu"))
model.add(tf.keras.layers.MaxPool2D(2,2))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(10, activation='softmax'))


#### Optimizer and loss function

In [None]:
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
model.summary()

#### Training with Stratified K-Fold

In [None]:
# Stratified K-Fold
k_fold = StratifiedKFold(n_splits=12, random_state=12, shuffle=True)

for k_train_index, k_test_index in k_fold.split(X, y):
    model.fit(X[k_train_index,:], y[k_train_index], epochs=5)

In [None]:
val_loss, val_acc = model.evaluate(X, y)
val_acc

### Predicting the submission dataframe

In [None]:
test_pred = model.predict(test)

In [None]:
submission = pd.DataFrame()
submission['ImageId'] = range(1, (len(test)+1))
submission['Label'] = np.argmax(test_pred, axis=1)

In [None]:
submission.head()

In [None]:
submission.shape

In [None]:
submission.to_csv("submission.csv", index=False)