# MNIST: Digit Recognizer Getting Started Challenge

*Author: Benjamin Sautermeister*

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
import numpy as np

### Load Data

In [None]:
train = pd.read_csv('inputs/train.csv')
train.shape

In [None]:
test = pd.read_csv('inputs/test.csv')
test.shape

#### Convert image data to numpy array of type *float* and split the label out of the training data

In [None]:
x_train = (train.iloc[:, 1:].values).astype(np.float32)
y_train = (train.iloc[:, 0].values).astype(np.int32)
x_test = test.values.astype(np.float32)

In [None]:
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

### Data Visualization

In [None]:
for i in range(6, 9):
    plt.subplot(330 + (i+1))
    plt.imshow(x_train[i, :, :, 0], cmap=plt.get_cmap('gray'))
    plt.title(y_train[i])

### Data Preprocessing

In [None]:
mean_x = x_train.mean().astype(np.float32)
std_x = x_train.std().astype(np.float32)

def standardize(x):
    return (x - mean_x) / std_x

In [None]:
y_train = tf.keras.utils.to_categorical(y_train)
num_classes = y_train.shape[1]
num_classes

### Data splits

In [None]:
seed = 42
np.random.seed(seed)

In [None]:
inputs = x_train
label = y_train
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.10, random_state=seed)

### Model

In [None]:
def create_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Lambda(standardize, input_shape=(28,28,1)),
        tf.keras.layers.Conv2D(32,(3,3)),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.BatchNormalization(axis=1),
        tf.keras.layers.Conv2D(32,(3,3)),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.BatchNormalization(axis=1),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(64,(3,3)),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.BatchNormalization(axis=1),
        tf.keras.layers.Conv2D(64,(3,3)),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.BatchNormalization(axis=1),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512),
        tf.keras.layers.Activation('relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(10, activation='softmax')
        ])
    
    optimizer = tf.keras.optimizers.Adam(lr=0.005)
    model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.3,
    zoom_range=0.1)

In [None]:
batches = gen.flow(x_train, y_train, batch_size=100)
val_batches = gen.flow(x_val, y_val, batch_size=100)

In [None]:
model = create_model()

reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_acc', 
    patience=3, 
    verbose=1, 
    factor=0.5, 
    min_lr=0.00001)

early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_acc',
    min_delta=0.0001,
    patience=5,
    verbose=1,
    mode='auto',
    baseline=None
)

checkpoint_model_selection_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='checkpoints/model_selection/ckp',
    monitor='val_acc',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    period=1)

history = model.fit_generator(
    generator=batches,
    steps_per_epoch=batches.n,
    epochs=25,
    verbose=2,
    validation_data=val_batches,
    validation_steps=val_batches.n,
    callbacks=[
        checkpoint_model_selection_callback,
        reduce_lr_callback,
        early_stopping_callback
    ])

In [None]:
def plot_values(train_values, valid_values, y_label):
    epochs = range(1, len(train_values) + 1)
    plt.clf()
    plt.plot(epochs, train_values, 'b')
    if valid_values is not None:
        plt.plot(epochs, valid_values, 'g')
    plt.xlabel('Epochs')
    plt.ylabel(y_label)
    plt.show()
    
history_dict = history.history

In [None]:
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
plot_values(loss_values, val_loss_values, 'Loss')

In [None]:
acc_values = history_dict['acc']
val_acc_values = history_dict['val_acc']
plot_values(acc_values, val_acc_values, 'Accuracy')

### Submission

#### Option A: Retrain on full training set

In [None]:
model = create_model()

checkpoint_train_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='checkpoints/train/ckp',
    monitor='acc',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
    period=1)

reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='acc', 
    patience=2, 
    verbose=1, 
    factor=0.5, 
    min_lr=0.00001)

early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='acc',
    min_delta=0.0001,
    patience=2,
    verbose=1,
    mode='auto',
    baseline=None
)

batches = gen.flow(inputs, label, batch_size=100)
history = model.fit_generator(
    generator=batches,
    steps_per_epoch=batches.n,
    epochs=10,
    verbose=2,
    callbacks=[
        checkpoint_train_callback,
        reduce_lr_callback,
        early_stopping_callback
    ])

#### Option B: Load best checkpoint:

In [None]:
model = create_model()
latest = tf.train.latest_checkpoint('checkpoints/train')
model.load_weights(latest)

In [None]:
history_dict = history.history
loss_values = history_dict['loss']
plot_values(loss_values, None, 'Loss')

In [None]:
acc_values = history_dict['acc']
plot_values(acc_values, None, 'Accuracy')

In [None]:
predictions = model.predict_classes(x_test, verbose=0)

submissions=pd.DataFrame({
    "ImageId": list(range(1,len(predictions)+1)),
    "Label": predictions
})
submissions.to_csv("submission.csv", index=False, header=True)