In [1]:


import numpy as np 
import pandas as pd



import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


#### Importing Libraries

In [2]:

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D

from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
import tensorflow as tf


sns.set(style='white', context='notebook', palette='deep')

In [25]:
from keras.models import load_model
import pickle
from joblib import Parallel, delayed
import joblib

Importing Training data and Testing data from Kaggle

In [3]:
train_data = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test_data = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")

Printing train data

In [4]:
train_data.head()

#### Spliting the data
We know that to train the model we need both features and values. So we need to get these features and values from the train data we imported. When we oberseve closely in Train data we have 785 columns, which means first column is value and remaining 784 are features for our model. Now we will create two variables one is x_train and other is y_train. These are features and values of our feature set.

In [5]:
y_train = train_data["label"]
x_train = train_data.drop(labels = ["label"],axis = 1)
del train_data

y_train.value_counts()

we all know how relu and sigmoid activation works ,here is the diagram of relu and sigmoid activation functions
![data](https://miro.medium.com/max/1400/1*XxxiA0jJvPrHEJHD4z893g.png)

When we observe closely the slope of sygmoid curve is decreasing as the value is increasing, so it is very difficult to train the data which as high values because when the slope is less the learning rate also slow which takes more steps to reach optimum, even in 
relu curve for higher values the learning rate is very high which will also cause problem for us,it will jump too high which will lead to bouncing back to its path or missing the optimum . so to avoid all these we need to set the range of values in between -1 to 1. So what we do is we will normalise the all values so that he values are lie between 0 to 1.

In [6]:
x_train = x_train/255.0
test_data = test_data/255.0

In [7]:
x_train = x_train.values.reshape(-1,28,28,1)
test_data = test_data.values.reshape(-1,28,28,1)

In [8]:
x_train.shape

In Convolutional Neural Networks the output of the model is an array. So we need to change the input values into numpy array let say for example our label is 4 we change that into 
[0,0,0,0,1,0,0,0,0,0] which will also represent the value 4. The output of our model gives the probability of each entry in this array for example our input number is 6 the output of our model will be like [0.0,0.0,0.0,0.0,0.0,0.05,0.6,0.0,0.2,0.15] in which we will consider the position or index which has high probability as our answer.

In [9]:
y_train = to_categorical(y_train, num_classes = 10)

#### Spliting the data
We should not use same data for training and validation. So we need to split the data to get both training data and validation data. In our model we are spliting the total data into 0.8 and 0.2 divisons. So that we will get more examples for training the model

In [10]:
random_seed = 2
x_train,x_val,y_train,y_val = train_test_split(x_train,y_train,test_size = 0.1, random_state=random_seed)

In [11]:
g = plt.imshow(x_train[0][:,:,0])

In [12]:
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))


model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))

In [14]:

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

In [15]:
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

In [16]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [17]:
epochs = 40 
batch_size = 86

In [18]:
datagen = ImageDataGenerator(
        featurewise_center=False,  
        samplewise_center=False, 
        featurewise_std_normalization=False,  
        samplewise_std_normalization=False,
        zca_whitening=False,  
        rotation_range=10,
        zoom_range = 0.1, 
        width_shift_range=0.1,  
        height_shift_range=0.1, 
        horizontal_flip=False,  
        vertical_flip=False)  


datagen.fit(x_train)


In [19]:
history = model.fit(datagen.flow(x_train,y_train, batch_size=batch_size),
                              epochs = epochs, validation_data = (x_val,y_val),
                              verbose = 2, steps_per_epoch=x_train.shape[0] // batch_size
                              , callbacks=[learning_rate_reduction])

In [27]:
y_pred = model.predict(x_val)

y_pred_classes = np.argmax(y_pred,axis = 1) 

y_true = np.argmax(y_val,axis = 1) 

confusion_mtx = confusion_matrix(y_true, y_pred_classes) 

In [28]:
errors = (y_pred_classes - y_true != 0)

Y_pred_classes_errors = y_pred_classes[errors]
Y_pred_errors = y_pred[errors]
Y_true_errors = y_true[errors]
X_val_errors = x_val[errors]

def display_errors(errors_index,img_errors,pred_errors, obs_errors):
  
    n = 0
    nrows = 2
    ncols = 3
    fig, ax = plt.subplots(nrows,ncols,sharex=True,sharey=True)
    for row in range(nrows):
        for col in range(ncols):
            error = errors_index[n]
            ax[row,col].imshow((img_errors[error]).reshape((28,28)))
            ax[row,col].set_title("Predicted label :{}\nTrue label :{}".format(pred_errors[error],obs_errors[error]))
            n += 1

Y_pred_errors_prob = np.max(Y_pred_errors,axis = 1)


true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))


delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors


sorted_dela_errors = np.argsort(delta_pred_true_errors)

most_important_errors = sorted_dela_errors[-6:]


display_errors(most_important_errors, X_val_errors, Y_pred_classes_errors, Y_true_errors)

In [29]:

results = model.predict(test_data)


results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")

In [30]:
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

submission.to_csv("cnn_mnist_datagen.csv",index=False)