In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# accuracy provided by the model : 0.99535 (Rank 333 among 2500 teams)
# Submitted on kaggle

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import itertools
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import matplotlib.image as img
import seaborn as sns
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten
from keras.optimizers import Adam, RMSprop


%matplotlib inline

np.random.seed(2)

In [None]:
# Load the data
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')


In [None]:
Y_train = train["label"]
X_train = train.drop("label", axis=1)

#normalisation
X_train /= 255.0
X_test = test/255.0

#debugging
print(Y_train.shape)
print(X_train.shape)

#reshaping
X_train = np.array(X_train).reshape(-1, 28, 28 ,1)
X_test = test.values.reshape(-1,28,28,1)

random_seed=2

#debugging
print(X_train.shape)
print(X_test.shape)
print(type(X_train))
print(type(X_test))
plt.hist(Y_train)
print(Y_train.value_counts())


In [None]:
# Encode labels to one hot vectors (ex : 9 -> [0,0,0,0,0,0,0,0,0,9])
Y_train = to_categorical(Y_train, num_classes=10)


In [None]:
# Split the train and the validation set
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.01, random_state=random_seed)


In [None]:
# Set the CNN model 
model=Sequential()
#model.add(describe(Conv2D()))
model.add(Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu', input_shape=(28,28,1)))
model.add(Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.1))

model.add(Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.1))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10,activation='softmax'))


In [None]:
# optimizer for gradient descent
optimizer = Adam(lr=0.001)
# Compile the model
model.compile(optimizer = optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
# learning rate reduction so that as the gradient descent reaches towards the minima the steps taken by the gradient descent becomes small.
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=1, factor=0.5, min_lr=0.00001)

#early_stopping = EarlyStopping(monitor='val_acc', patience=2, verbose=1, min_delta=0.01) # for applying early stopping

epochs = 50
batch_size = 86

In [None]:
# Data augmentaion
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


datagen.fit(X_train)


In [None]:
# Fit the model
model_fit = model.fit_generator(datagen.flow(X_train,Y_train, batch_size=batch_size),
                              epochs = epochs, validation_data = (X_val,Y_val),
                              verbose = 2, steps_per_epoch=X_train.shape[0] // batch_size
                              , callbacks=[learning_rate_reduction])



In [None]:
# Visualization of the model performance.
fig, ax= plt.subplots(2,1)
ax[0].plot(model_fit.history['loss'], color='b', label='Training loss')
ax[0].plot(model_fit.history['val_loss'], color='r', label='Validation loss', axes=ax[0])
#legend = ax[0].legend
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(model_fit.history['accuracy'], color='b', label="Training accuracy")
ax[1].plot(model_fit.history['val_accuracy'], color='r',label="Validation accuracy")
ax[1].legend()

In [None]:
# Some rough work to visualize the errors in the predicted label.
import seaborn as sns
# Predict the values from the validation dataset
Y_pred = model.predict(X_val)
# Convert predictions classes to one hot vectors 
Y_pred_classes = np.argmax(Y_pred,axis = 1) 
#print(Y_pred.shape)
# Convert validation observations to one hot vectors
Y_true = np.argmax(Y_val,axis = 1) 
# compute the confusion matrix
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
sns.heatmap(confusion_mtx, annot=True, fmt='d')


errors = ((Y_pred_classes - Y_true)!=0)
X_val_errors = X_val[errors]
Y_val_error = Y_true[errors]
Y_pred_errors = Y_pred[errors]
print(Y_pred_errors[0])
Y_pred_classes_errors = Y_pred_classes[errors]
print(Y_pred_classes_errors[0])
print(Y_pred_errors[0][Y_pred_classes_errors[0]])
print(Y_val_error[0])
print(Y_pred_errors[0][(Y_val_error[0])])
X_error_diff=[]
for i in range(0,len(Y_pred_errors)):
    X_error_diff.append(Y_pred_errors[i][Y_pred_classes_errors[i]] - Y_pred_errors[i][Y_val_error[i]])
    
# for i in range(0,42):
#     X_error_diff.append(Y_pred[errors][Y_pred_classes_errors[i]] - Y_pred[errors][Y_val_error[i]])
print(errors.sum())

#fig, ax= plt.plots(2,1)


for i in range(0,len(X_val_errors)):
    print(X_error_diff[i])
    print("predicted value:", Y_pred_classes_errors[i])
    print("actual value:", Y_val_error[i])
    images=X_val[i].reshape((28,28))
    #imgplot = 
    
    plt.imshow((X_val[i].reshape((28,28)))*255)
    plt.show()
    #f.add_subplot(imgplot)
    
#imgplot = img.imread(images)
print(imgplot)
plt.show()


In [None]:
# predict the results and fit according to the output
results = model.predict(X_test)
results=np.argmax(results, axis=1)
results= pd.Series(results, name='Label')

In [None]:
#store the results in the output file.
submission = pd.concat([pd.Series(range(1,28001), name="ImageId"), results], axis=1)
submission.to_csv("Submission.csv", index=False)