# 0. Import libraries

In [None]:
# common
import os
import numpy as np
import pandas as pd
import tensorflow as tf

# data preprocessing
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# data visualization
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# model building
from keras.models import Sequential
from keras.layers import Input
from keras.layers import Conv2D
from keras.layers import MaxPool2D
from keras.layers import Flatten
from keras.layers import Dense

# call backs
from tensorflow.keras.callbacks import EarlyStopping

# 1. Data Preprocessing

In [None]:
# walk through the directories
for dirname, dirpath, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# load data from CSV file
train_data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_data = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
submission = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

In [None]:
# check the data
train_data.head()

In [None]:
# check for any null values in train data
train_data.isnull().sum()

In [None]:
# check for any null values in test data
test_data.isnull().sum()

In [None]:
# better information of the data
train_data.info()

In [None]:
# seperate training images and labels
x_train = train_data.drop(labels=['label'], axis=1)
y_train = train_data['label']

In [None]:
# check the train_x data
x_train.head()

In [None]:
# check the train_y data
y_train.head()

In [None]:
# class distribution
y_train.value_counts()

In [None]:
plt.figure(figsize=(10,6))
sns.histplot(y_train)
plt.title('Class Distribution')
plt.show()

In [None]:
# normalize the pixel values
x_train = x_train/255.
test = test_data/255.

In [None]:
# reshape the data to be an image
x_train = x_train.values.reshape(-1, 28, 28, 1)
test = test_data.values.reshape(-1, 28, 28, 1)

In [None]:
# shape of reshaped x_train, test
x_train.shape, test.shape

In [None]:
# one-hot encode the labels
y_train = to_categorical(y=y_train, num_classes=10)

In [None]:
# shape of y_train
y_train.shape

## 1.2 Split the data

In [None]:
# split the data into train and validation set
X_train, X_val, y_train, y_val = train_test_split(x_train, y_train,
                                                 test_size=0.1,
                                                 shuffle=True,
                                                 random_state=42)

In [None]:
X_train.shape, y_train.shape,X_val.shape, y_val.shape

# 2. View images

In [None]:
plt.figure(figsize=(16,16))
for i in range(25): 
    plt.subplot(5,5,i+1)
    plt.imshow(X_train[i])

# 3. Model training [Tiny VGG]

In [None]:
# set random seed
tf.random.set_seed(42)

# tiny vgg model
tiny_vgg_model = Sequential([
    Conv2D(filters=10, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)),
    Conv2D(10, 3, activation='relu'),
    MaxPool2D(pool_size=2, padding='valid'),
    Conv2D(10, 3, activation='relu'),
    Conv2D(10, 3, activation='relu'),
    MaxPool2D(2),
    Flatten(),
    Dense(10, activation='softmax')
])

In [None]:
# summary of tiny-VGG
tiny_vgg_model.summary()

In [None]:
# compile the model
tiny_vgg_model.compile(loss=tf.keras.losses.categorical_crossentropy,
                optimizer="Adam",
                metrics=["accuracy"])

In [None]:
# early stopping callback
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [None]:
# fit the model
tiny_vgg_history = tiny_vgg_model.fit(X_train, y_train,
                                      epochs=50, batch_size=8,
                                      steps_per_epoch=len(X_train),
                                      validation_data=(X_val, y_val),
                                      callbacks=[early_stop])

# 4. Visualize the performance

In [None]:
# visualize the performance
pd.DataFrame(tiny_vgg_history.history).plot();
plt.title('Accuracy & Loss')
plt.ylabel('Accuracy/Loss')
plt.xlabel('Epoch')
plt.show()

# 5.Evaluate model on validation set

In [None]:
tiny_vgg_model.evaluate(X_val, y_val)

In [None]:
# plot confusion matrix
Y_pred = tiny_vgg_model.predict(X_val)
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
Y_true = np.argmax(y_val,axis = 1) 
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
f,ax = plt.subplots(figsize=(10, 7))
sns.heatmap(confusion_mtx, annot=True, linewidths=0.01, fmt= '.1f',ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

# 6. Prediction on test data

In [None]:
# shape of test data
test_data.shape

In [None]:
# reshaping the test data
x_test = test_data.values.reshape(-1, 28, 28, 1)/255.

In [None]:
# get the predictions from the trained model
results = tiny_vgg_model.predict(x_test)
results = np.argmax(results,axis = 1),
results

# 7. Submission

In [None]:
submission = pd.DataFrame({'ImageId': range(1,28001), 'Label':results[0]})
submission.to_csv('submission.csv', index=False)
pd.read_csv('submission.csv')

 **Thank You and Do UP VOTE**👍