<h1>Digit Recognizer</h1>

[https://www.kaggle.com/c/digit-recognizer](https://www.kaggle.com/c/digit-recognizer)

Author: Vaasudevan Srinivasan [(Portfolio)](https://vaasudevans.github.io) <br>
Created on: July 09, 2021 <br>

This notebook demonstrates how to use augmentation as a neural network layer. <br>
This will perform augmentation in GPU (if available) and it will speed up training a lot faster.

In [None]:
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Input
from tensorflow.keras.layers.experimental import preprocessing
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
import pandas as pd
import numpy as np

sns.set_theme()

# Data Preparation

In [None]:
train_df = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
train_df.head()

In [None]:
sns.histplot(train_df, x='label', bins=10, kde=True);

In [None]:
x_train = train_df.drop('label', axis=1).to_numpy()
y_train = train_df['label'].to_numpy()

x_train = x_train.reshape((len(train_df), 28, 28))
x_train = np.expand_dims(x_train, axis=-1)

print(x_train.shape, y_train.shape)

In [None]:
# Visualization
plt.figure(figsize=(6, 6))
cols, rows = 10, 10
for i in range(cols * rows):
    plt.subplot(rows, cols, i + 1)
    plt.imshow(x_train[i])
    plt.axis('off')
plt.tight_layout();

# CNN model

In [None]:
# Augmentation Layer
data_augmentation = tf.keras.models.Sequential([
    preprocessing.RandomRotation(0.1),
    preprocessing.RandomZoom(0.1),
], name='augmentation')

# CNN Model
cnn_model = tf.keras.models.Sequential([
    Conv2D(filters=24, kernel_size=3, activation='relu'),
    MaxPool2D(pool_size=2),
    Conv2D(filters=36, kernel_size=3, activation='relu'),
    MaxPool2D(pool_size=2),
    Flatten(),
], name='cnn_model')

In [None]:
# To maintain same training loss and accuracy
tf.random.set_seed(1996)

# Construct the model with Augmentation
inputs = Input(shape=(28, 28, 1), name='input')
x = data_augmentation(inputs)
x = cnn_model(x)
x = Dense(128, activation='relu', name='hidden')(x)
outputs = Dense(10, activation='softmax', name='output')(x)

model = tf.keras.Model(inputs, outputs)
model.summary()

In [None]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

h = model.fit(x_train, y_train,
              validation_split=0.05,
              epochs=10)

In [None]:
pd.DataFrame(h.history).plot();

# Prediction and Submission

In [None]:
test_df = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
test_df.head()

In [None]:
x_test = test_df.to_numpy().reshape((len(test_df), 28, 28))
x_test = np.expand_dims(x_test, axis=-1)
print(x_test.shape)

submission = pd.DataFrame({
    'ImageId': test_df.index + 1,
    'Label': np.argmax(model.predict(x_test), axis=1)
})

submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)