<a href="https://colab.research.google.com/github/bhavyaKumawat/tensorflow-datasets/blob/main/MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import Libraries

In [None]:
import zipfile
import os
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam, RMSprop
from sklearn.metrics import confusion_matrix

## Download Kaggle Datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
! mkdir /root/.kaggle

In [None]:
! cp 'drive/MyDrive/Colab Notebooks/kaggle.json' '/root/.kaggle/kaggle.json'

In [None]:
! kaggle competitions download -c digit-recognizer

In [None]:
files = ['train.csv.zip', 'test.csv.zip']

for File in files:
  with zipfile.ZipFile(File, 'r') as zip_ref:
      zip_ref.extractall(os.getcwd())

In [None]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
print("train shape: ", train.shape)
print("test shape: ", test.shape)

In [None]:
type(train)

In [None]:
x_train1 , y_train1 = train.drop('label', axis = 1), train['label']

In [None]:
x_train1 = x_train1.values.reshape(-1, 28, 28)
y_train1 = y_train1.values

In [None]:
print("x_train1 shape: ", x_train1.shape)
print("y_train1 shape: ", y_train1.shape)

## Download keras Dataset

In [None]:
(x_train2, y_train2), (x_test, y_test) = keras.datasets.mnist.load_data()

In [None]:
print("x_train2 shape: ", x_train2.shape)
print("y_train2 shape: ", y_train2.shape)
print("x_test shape: ", x_test.shape)
print("y_test shape: ", y_test.shape)

In [None]:
type(x_train2)

## Concatenate both the datasets

In [None]:
x_train = np.concatenate((x_train2, x_train1))
y_train = np.concatenate((y_train2, y_train1))

In [None]:
print("x_train shape: ", x_train.shape)
print("y_train shape: ", y_train.shape)

## Plot some images in dataset

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(x_train[i], cmap=plt.cm.binary)
plt.show()

Scale images to the [0, 1] range

In [None]:
x_train = x_train /255.0
x_test = x_test /255.0

Reshape images to (28, 28, 1)

In [None]:
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

In [None]:
print("x_train new shape: ", x_train.shape)
print("x_test new shape: ", x_test.shape)

## Data augmentation

In [None]:
validation_split = 0.1

datagen = ImageDataGenerator(
    rotation_range = 30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range = 0.2,
    fill_mode = 'nearest',
    validation_split = validation_split,

)

In [None]:
train_generator = datagen.flow(x_train, 
                               y_train, 
                               batch_size = 32,
                               subset='training'
)

valid_generator = datagen.flow(x_train,
                              y_train,
                              batch_size= 8, 
                              subset='validation'
)

## Build the model

[How to choose CNN Architecture MNIST](https://www.kaggle.com/cdeotte/how-to-choose-cnn-architecture-mnist)

In [None]:
model = keras.Sequential([
                          keras.layers.Conv2D(32, (3, 3), activation= 'relu', input_shape = (28, 28 ,1)),
                          keras.layers.BatchNormalization(),
                          keras.layers.Conv2D(32, (3, 3), activation= 'relu', input_shape = (28, 28 ,1)),
                          keras.layers.BatchNormalization(),
                          keras.layers.MaxPooling2D((2, 2)),
                          keras.layers.Dropout(0.4),

                          keras.layers.Conv2D(64, (3, 3), activation= 'relu'),
                          keras.layers.BatchNormalization(),
                          keras.layers.Conv2D(64, (3, 3), activation= 'relu'),
                          keras.layers.BatchNormalization(),
                          keras.layers.MaxPooling2D((2, 2)),
                          keras.layers.Dropout(0.4),

                          keras.layers.Flatten(),
                          keras.layers.Dense(256, activation= 'relu'),
                          keras.layers.BatchNormalization(),
                          keras.layers.Dropout(0.4),
                          keras.layers.Dense(10, activation = 'softmax')                          
])

In [None]:
model.summary()

In [None]:
optimizer = Adam(learning_rate = 1e-03) 
model.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy', metrics= 'accuracy')

## CallBacks

In [None]:
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=15)

## Train the model

In [None]:
history = model.fit(train_generator, 
                    steps_per_epoch= (len(x_train)*(1-validation_split)) / 32,                    
                    epochs= 30, 
                    validation_data= valid_generator,
                    validation_steps= (len(x_train)* validation_split) / 8,
                    callbacks = [earlyStopping]
                    )

## Visualizations of the image as it passes through the convolutions.

In [None]:
# Let's define a new Model that will take an image as input, and will output
# intermediate representations for all layers in the previous model 
feature_extractor = keras.models.Model(    
    inputs = model.input, 
    outputs = [layer.output for layer in model.layers]
    )
# an input image from the training set.
img = x_train[2052]

# Add the image to a batch
x = np.expand_dims(img, axis = 0)  

# Let's run our image through our network, thus obtaining all
# intermediate representations for this image.
successive_feature_maps = feature_extractor.predict(x)

# These are the names of the layers, so can have them as part of our plot
layer_names = [layer.name for layer in model.layers]

# Now let's display our representations
for layer_name, feature_map in zip(layer_names, successive_feature_maps):
    # Just do this for the conv / maxpool layers, not the fully-connected layers
    if len(feature_map.shape) == 4:
        #The feature map has shape (1, size, size, n_features)
        n_features = feature_map.shape[-1]  
        size = feature_map.shape[1]
        # We will tile our images in this matrix
        display_grid = np.zeros((size, size * n_features))
        for i in range(n_features):
            # Postprocess the feature to make it visually palatable
            x = feature_map[0, :, :, i]
            x -= x.mean()
            x /= x.std()
            x *= 64
            x += 128
            x = np.clip(x, 0, 255).astype('uint8')
            # We'll tile each filter into this big horizontal grid
            display_grid[:, i * size : (i + 1) * size] = x
        # Display the grid
        scale = 20. / n_features
        plt.figure(figsize=(scale * n_features, scale))
        plt.title(layer_name)
        plt.grid(False)
        plt.xticks([])
        plt.yticks([])
        plt.imshow(display_grid, aspect='auto', cmap='viridis')

## Wrong Predictions

In [None]:
predictions = model.predict(x_train)
predicted_classes = np.argmax(predictions,axis=1)
wrong_predictions = x_train[predicted_classes != y_train]
indices = np.nonzero((predicted_classes != y_train))[0]

In [None]:
number = wrong_predictions.shape[0]
print("Number of wrong predictions : ", number ) 

In [None]:
plt.figure(figsize=(10, 15))
for i in range(100):
    plt.subplot(10 ,10,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.title(indices[i])
    plt.imshow(wrong_predictions[i].reshape(28, 28), cmap=plt.cm.binary)
plt.show()

## Confusion Matrix

In [None]:
def plot_confusion_matrix(cm, classes):
    
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
        
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label');

In [None]:
confusion_mtx = confusion_matrix(y_train, predicted_classes)
plot_confusion_matrix(confusion_mtx, classes = range(10))

## Training vs Validation accuracy

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()


plt.show()

In [None]:
model.evaluate(x_test, y_test)

## Submission

In [None]:
test = test.values.reshape(-1, 28, 28, 1)
test = test / 255.0

In [None]:
predictions = np.argmax( model.predict(test) ,axis=1)

In [None]:
data = {'ImageId': pd.Series(range(1 , len(predictions)+1)), 
        'Label':predictions} 

submission = pd.DataFrame(data) 

In [None]:
submission.to_csv('submission.csv'  , index=False)

In [None]:
! kaggle competitions submit -c digit-recognizer -f submission.csv -m 'accuracy 99.5%'