## CNN feature extraction workflow:

1. image
2. filter(s) (several layers might be used to grasp all the images' complexity)
  * common filter size: 3x3
  * filter depth: n color channels ; 1 for grayscale - 3 for RGB
  * filter stride (displacement in pixels) - common: 2
3. feature map
4. activation function/layer (increase non-linearity) - typically ReLU (Rectified Linear Unit) (noise?)
5. pooling layer: down sampling to prevent over fitting
  * resulting map: to be defined (2x2, 3x3, ... ?)
  * pooling types: max, mean, sum. Max is the most commonly used
6. flattening
7. densely (fully) connected layers / ANN (artificial neural network): combine features into attributes used for classification
  * backpropagation is used to trained the network by updating neural weights influencing the next layer

* 2-6: data preparation
* 7: what gets trained


In [1]:
import numpy as np

np.random.seed(0)

In [2]:
# load full dataset
import pandas as pd

train = pd.read_csv("data/train.csv.zip")

In [3]:
# split X, y

y = train["label"]
X = train.drop("label", axis = 1)

# data preprocessing
X = X / 255.0
X = X.to_numpy().reshape(-1, 28, 28, 1)

y = pd.get_dummies(y)
n_classes = y.shape[1]

# data augmentation
from tensorflow import keras

datagen = keras.preprocessing.image.ImageDataGenerator(
    rotation_range = 10, height_shift_range = 0.1, width_shift_range = 0.1, 
    zoom_range = 0.1,
    validation_split = 0.2
)
datagen.fit(X)

In [4]:
# model architecture

model = keras.Sequential()

# 1st CNN layer
model.add(keras.layers.Conv2D(128, 5, input_shape = X.shape[1:],
                              activation = "relu", padding = "same"))
# Normalise outputs from this layer / inputs of the next layer
model.add(keras.layers.BatchNormalization())

# 2nd CNN layer
model.add(keras.layers.Conv2D(128, 5, activation = "relu", padding = "same"))
model.add(keras.layers.BatchNormalization())
# Prevent overfitting
model.add(keras.layers.MaxPooling2D(2))
model.add(keras.layers.Dropout(0.2))

# 3rd CNN layer
model.add(keras.layers.Conv2D(128, 3, activation = "relu", padding = "same"))
model.add(keras.layers.BatchNormalization())

# 4th CNN layer
model.add(keras.layers.Conv2D(128, 3, activation = "relu", padding = "same"))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D(2))
model.add(keras.layers.Dropout(0.2))

# 5th CNN layer
model.add(keras.layers.Conv2D(128, 3, activation = "relu", padding = "same"))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.MaxPooling2D(2))
model.add(keras.layers.Dropout(0.2))

# ANN
# 1st layer
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(256, activation = "relu"))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.2))
# Output layer
model.add(keras.layers.Dense(n_classes, activation = "softmax"))

model.compile(loss = "categorical_crossentropy", 
              optimizer = keras.optimizers.Adam(learning_rate = 0.001), 
              metrics = "categorical_accuracy")

In [5]:
print(model.summary())

In [6]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

early_stopping = EarlyStopping(monitor = "val_categorical_accuracy", 
                               patience = 10, restore_best_weights = True)
lr_reduction = ReduceLROnPlateau(monitor = "val_categorical_accuracy", 
                                 patience = 3, verbose = 1, factor = 0.5,
                                 min_lr = 0.00001)

# ~99.5% acc
batch_size = 128
history = model.fit(
    datagen.flow(X, y, batch_size = batch_size, subset = "training"),
    validation_data = datagen.flow(X, y, batch_size = batch_size, 
                                   subset = "validation"),
    epochs = 100, callbacks = [early_stopping, lr_reduction])
# Model evaluation
scores = model.evaluate(datagen.flow(X, y, subset= "validation"), verbose = 0)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [10]:
import matplotlib.pyplot as plt

plt.subplot()
plt.plot(history.history["categorical_accuracy"], label = "training")
plt.plot(history.history["val_categorical_accuracy"], label = "validation")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

plt.show()

In [12]:
test = pd.read_csv("data/test.csv.zip")
test = test / 255.0
test = test.to_numpy().reshape(-1, 28, 28, 1)

In [16]:
results = model.predict(test) # mat of 0 / 1
results = np.argmax(results, axis = 1) # vect of classes
results = pd.Series(results, name = "Label")

In [22]:
submission = pd.concat([pd.Series(range(1, len(results)+1), name = "ImageId"), results], axis = 1)
submission.to_csv("data/submission_cnn.csv", index = False)