# Digit recognizer
_2019.09.05_

[Introduction to CNN Keras - 0.997 (top 6%)](https://www.kaggle.com/yassineghouzam/introduction-to-cnn-keras-0-997-top-6)

In [1]:
import glob

import numpy as np
import pandas as pd

from tqdm import tqdm
from tqdm import tqdm_notebook

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import itertools
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.callbacks import ReduceLROnPlateau
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D

%matplotlib inline

np.random.seed(2)

sns.set(style='white', context='notebook', palette='deep')

Using TensorFlow backend.


In [2]:
def reduce_memory_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    if verbose:
        print('Starting memory usage: {:5.2f} MB'.format(start_mem))
        for col in df.columns:
            col_type = df[col].dtypes
            if col_type in numerics:
                c_min = df[col].min()
                c_max = df[col].max()
                if str(col_type)[:3] == 'int':
                    if c_min >= np.iinfo(np.int8).min and c_max <= np.iinfo(np.int8).max:
                        df[col] = df[col].astype(np.int8)
                    elif c_min >= np.iinfo(np.int16).min and c_max <= np.iinfo(np.int16).max:
                        df[col] = df[col].astype(np.int16)
                    elif c_min >= np.iinfo(np.int32).min and c_max <= np.iinfo(np.int32).max:
                        df[col] = df[col].astype(np.int32)
                    elif c_min >= np.iinfo(np.int64).min and c_max <= np.iinfo(np.int64).max:
                        df[col] = df[col].astype(np.int64)
                else:
                    if c_min >= np.finfo(np.float16).min and c_max <= np.finfo(np.float16).max:
                        df[col] = df[col].astype(np.float16)
                    elif c_min >= np.finfo(np.float32).min and c_max <= np.finfo(np.float32).max:
                        df[col] = df[col].astype(np.float32)
                    else:
                        df[col] = df[col].astype(np.float64)
        end_mem = df.memory_usage().sum() / 1024**2
        if verbose: print('Reduced memory usage: {:5.2f} MB ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
        return df

## Loading data

In [3]:
df_train = pd.read_csv("input/train.csv")
df_train = reduce_memory_usage(df_train)

Starting memory usage: 251.00 MB
Reduced memory usage: 58.00 MB (76.0% reduction)


In [4]:
df_test = pd.read_csv("input/test.csv")
df_test = reduce_memory_usage(df_test)

Starting memory usage: 167.00 MB
Reduced memory usage: 38.00 MB (77.0% reduction)


In [6]:
y_train = df_train["label"]

x_train = df_train.drop(labels = ["label"], axis = 1) 

del df_train 

g = sns.countplot(y_train)

y_train.value_counts()

1    4684
7    4401
3    4351
9    4188
2    4177
6    4137
0    4132
4    4072
8    4063
5    3795
Name: label, dtype: int64

AttributeError: 'module' object has no attribute 'to_rgba'

## Checking for missing data

In [7]:
x_train.isnull().any().describe()

count       784
unique        1
top       False
freq        784
dtype: object

In [8]:
df_test.isnull().any().describe()

count       784
unique        1
top       False
freq        784
dtype: object

In [9]:
x_train = x_train / 255.0
df_test = df_test / 255.0

In [10]:
x_train = x_train.values.reshape(-1, 28, 28, 1)
df_test = df_test.values.reshape(-1, 28, 28, 1)

In [11]:
y_train = to_categorical(y_train, num_classes = 10)

In [12]:
random_seed = 2

In [13]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.1, random_state=random_seed)

In [15]:
g = plt.imshow(x_train[0][:, :, 0])

AttributeError: 'module' object has no attribute 'to_rgba'

## CNN

In [16]:
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))

W0905 10:03:06.508001 140529885713664 deprecation_wrapper.py:119] From /opt/conda/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0905 10:03:06.548104 140529885713664 deprecation_wrapper.py:119] From /opt/conda/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0905 10:03:06.567570 140529885713664 deprecation_wrapper.py:119] From /opt/conda/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0905 10:03:06.613586 140529885713664 deprecation_wrapper.py:119] From /opt/conda/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py:4267: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0905 10:03:06.619076 140529885713664 deprecation_wrapp

In [18]:
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

In [19]:
model.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics=["accuracy"])

W0905 10:04:06.709748 140529885713664 deprecation_wrapper.py:119] From /opt/conda/lib/python2.7/site-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0905 10:04:06.727571 140529885713664 deprecation_wrapper.py:119] From /opt/conda/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py:3576: The name tf.log is deprecated. Please use tf.math.log instead.



In [17]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, min_lr=0.00001)

In [20]:
epochs = 30 # Turn epochs to 30 to get 0.9967 accuracy
batch_size = 86

In [21]:
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(x_train)

In [22]:
history = model.fit_generator(
    datagen.flow(x_train, y_train, batch_size=batch_size),
    epochs = epochs,
    validation_data = (x_val, y_val),
    verbose = 2,
    steps_per_epoch=x_train.shape[0] // batch_size,
    callbacks=[learning_rate_reduction]
)

W0905 10:05:41.515866 140529885713664 deprecation.py:323] From /opt/conda/lib/python2.7/site-packages/tensorflow/python/ops/math_grad.py:1250: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/30
 - 135s - loss: 0.4185 - acc: 0.8648 - val_loss: 0.0638 - val_acc: 0.9826
Epoch 2/30
 - 133s - loss: 0.1376 - acc: 0.9603 - val_loss: 0.0447 - val_acc: 0.9871
Epoch 3/30
 - 132s - loss: 0.1021 - acc: 0.9714 - val_loss: 0.0394 - val_acc: 0.9888
Epoch 4/30
 - 127s - loss: 0.0895 - acc: 0.9747 - val_loss: 0.0308 - val_acc: 0.9907
Epoch 5/30
 - 131s - loss: 0.0836 - acc: 0.9752 - val_loss: 0.0322 - val_acc: 0.9905
Epoch 6/30
 - 130s - loss: 0.0774 - acc: 0.9785 - val_loss: 0.0300 - val_acc: 0.9931
Epoch 7/30
 - 134s - loss: 0.0738 - acc: 0.9791 - val_loss: 0.0294 - val_acc: 0.9914
Epoch 8/30
 - 128s - loss: 0.0744 - acc: 0.9785 - val_loss: 0.0352 - val_acc: 0.9905
Epoch 9/30
 - 132s - loss: 0.0807 - acc: 0.9796 - val_loss: 0.0466 - val_acc: 0.9874

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.000500000023749.
Epoch 10/30
 - 129s - loss: 0.0600 - acc: 0.9836 - val_loss: 0.0250 - val_acc: 0.9931
Epoch 11/30
 - 128s - loss: 0.0585 - acc: 0.9848 - val_loss: 0.0338 - va

In [23]:
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_acc'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

AttributeError: 'module' object has no attribute 'to_rgba'

In [24]:
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Predict the values from the validation dataset
y_pred = model.predict(x_val)

# Convert predictions classes to one hot vectors 
y_pred_classes = np.argmax(y_pred, axis = 1) 

# Convert validation observations to one hot vectors
y_true = np.argmax(y_val, axis = 1) 

# compute the confusion matrix
confusion_mtx = confusion_matrix(y_true, y_pred_classes) 

# plot the confusion matrix
plot_confusion_matrix(confusion_mtx, classes = range(10)) 

AttributeError: 'module' object has no attribute 'to_rgba'

In [25]:
errors = (y_pred_classes - y_true != 0)

y_pred_classes_errors = y_pred_classes[errors]
y_pred_errors = y_pred[errors]
y_true_errors = y_true[errors]
x_val_errors = x_val[errors]

def display_errors(errors_index, img_errors, pred_errors, obs_errors):
    """ This function shows 6 images with their predicted and real labels"""
    n = 0
    nrows = 2
    ncols = 3
    fig, ax = plt.subplots(nrows, ncols, sharex=True, sharey=True)
    for row in range(nrows):
        for col in range(ncols):
            error = errors_index[n]
            ax[row,col].imshow((img_errors[error]).reshape((28,28)))
            ax[row,col].set_title("Predicted label :{}\nTrue label :{}".format(pred_errors[error], obs_errors[error]))
            n += 1

# Probabilities of the wrong predicted numbers
y_pred_errors_prob = np.max(y_pred_errors, axis = 1)

# Predicted probabilities of the true values in the error set
true_prob_errors = np.diagonal(np.take(y_pred_errors, y_true_errors, axis=1))

# Difference between the probability of the predicted label and the true label
delta_pred_true_errors = y_pred_errors_prob - true_prob_errors

# Sorted list of the delta prob errors
sorted_dela_errors = np.argsort(delta_pred_true_errors)

# Top 6 errors 
most_important_errors = sorted_dela_errors[-6:]

# Show the top 6 errors
display_errors(most_important_errors, x_val_errors, y_pred_classes_errors, y_true_errors)

AttributeError: 'module' object has no attribute 'to_rgba'

In [27]:
# predict results
results = model.predict(df_test)

# select the indix with the maximum probability
results = np.argmax(results, axis = 1)
results = pd.Series(results, name="Label")

In [38]:
submission = pd.concat([pd.Series(range(1, 28001), name = "ImageId"), results], axis = 1)
submission.to_csv("output/mnist_recognizer_{:.6f}.csv".format(float(history.history['val_acc'][-1:][0])), index=False)
print('Saved to `output/mnist_recognizer_{:.6f}.csv`'.format(float(history.history['val_acc'][-1:][0])))

Saved to `output/mnist_recognizer_0.994762.csv`
