In [None]:
import functools
import shutil
import tempfile
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import sklearn.decomposition
import sklearn.metrics
import sklearn.model_selection
import sklearn.neural_network
import sklearn.pipeline

import keras
import tensorflow as tf

In [None]:
train_df = pd.read_csv("../input/train.csv")

data = train_df.drop("label", axis=1) / 255
target = pd.get_dummies(train_df["label"])

del train_df

In [None]:
def time_it(wrapped):
    @functools.wraps(wrapped)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = wrapped(*args, **kwargs)
        duration = time.time() - start_time

        min_or_sec = "sec"
        if duration > 60:
            duration /= 60
            min_or_sec = "min"
        print(f"{wrapped.__name__!r} done in {duration:.2f} {min_or_sec}.!")

        return result
    return wrapper

### MLP with Scikit-Learn

In [None]:
pca = sklearn.decomposition.PCA(
    n_components=100,
    random_state=200,
)
mlp = sklearn.neural_network.MLPClassifier(
    hidden_layer_sizes=(200, 200),
    solver="adam",
    max_iter=200,
    shuffle=True,
    random_state=200,
    tol=1e-4,
    verbose=1,
    early_stopping=True,
    validation_fraction=0.3,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-8,
    n_iter_no_change=5,
)

# cachedir = tempfile.mkdtemp()

pipe = sklearn.pipeline.Pipeline(
    steps=[
        ("pca", pca),
        ("mlp", mlp)
    ],
    # memory=cachedir
)

In [None]:
param_grid = {
    "pca__whiten": [False, True],
    "mlp__alpha": 10. ** -np.arange(1, 6),
    "mlp__batch_size": [200, 500],
    "mlp__learning_rate_init": [0.001, 0.005],
}
cv = sklearn.model_selection.StratifiedShuffleSplit(
    n_splits=3,
    test_size=0.3,
    random_state=200,
)
grid = sklearn.model_selection.GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    scoring="accuracy",
    n_jobs=-1,
    iid=False,
    cv=cv,
    verbose=3,
    error_score=np.nan,
    return_train_score=True,
)
# grid = grid.fit(data, target)
# Fitting 3 folds for each of 40 candidates, totalling 120 fits
# [Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
# [Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 12.7min
# [Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed: 49.9min finished

In [None]:
# Best estimator (grid.best_estimator_)
# Mean test score: 0.96545 (+/-0.00199)
pipe.set_params(**{
    "pca__whiten": False,
    "mlp__alpha": 0.01,
    "mlp__batch_size": 200,
    "mlp__learning_rate_init": 0.005,
    "mlp__verbose": 1
})
pipe = time_it(pipe.fit)(data, target)

# shutil.rmtree()

### ConvNet with Keras (Tensorflow Backend)

In [None]:
INPUT_SHAPE = (28, 28, 1)

LEARNING_RATE = 0.004
EPSILON = 1e-8
BATCH_SIZE = 256
EPOCHS = 64

In [None]:
splitter = sklearn.model_selection.StratifiedShuffleSplit(
    n_splits=1,
    test_size=0.3,
    random_state=200,
)
train_idx, test_idx = splitter.split(data, target).__next__()

data_4d = np.array(data).reshape((data.shape[0],) + INPUT_SHAPE)

data_train_4d, data_test_4d = data_4d[train_idx], data_4d[test_idx]
target_train, target_test = target.iloc[train_idx], target.iloc[test_idx]

In [None]:
datagen = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
)

In [None]:
cnn_model = keras.models.Sequential()
cnn_model.add(keras.layers.Conv2D(
    filters=32,
    kernel_size=5,
    strides=1,
    padding="valid",
    input_shape=INPUT_SHAPE,
))
# cnn_model.add(keras.layers.BatchNormalization())
cnn_model.add(keras.layers.Activation(activation="relu"))
cnn_model.add(keras.layers.MaxPooling2D(
    pool_size=2,
    strides=2,
))
cnn_model.add(keras.layers.Dropout(rate=0.25))
cnn_model.add(keras.layers.Conv2D(
    filters=64,
    kernel_size=5,
    strides=1,
    padding="valid",    
))
# cnn_model.add(keras.layers.BatchNormalization())
cnn_model.add(keras.layers.Activation(activation="relu"))
cnn_model.add(keras.layers.MaxPooling2D(
    pool_size=2,
    strides=2,
))
cnn_model.add(keras.layers.Dropout(rate=0.25))
cnn_model.add(keras.layers.Flatten())
cnn_model.add(keras.layers.Dense(
    units=128,
))
cnn_model.add(keras.layers.BatchNormalization())
cnn_model.add(keras.layers.Activation(activation="relu"))
cnn_model.add(keras.layers.Dropout(rate=0.5))
cnn_model.add(keras.layers.Dense(
    units=10,
    activation="softmax",
))

In [None]:
cnn_model.summary()

In [None]:
optimizer = keras.optimizers.Adam(
    lr=LEARNING_RATE,
    epsilon=EPSILON,
)
cnn_model.compile(
    optimizer=optimizer,
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

In [None]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=8,
    verbose=1,
)
lr_reduction = keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,
    patience=3,
    min_lr=0.000125,
    min_delta=1e-6,
    verbose=2,
)

In [None]:
train_data_generator = datagen.flow(data_train_4d, target_train,
                                    batch_size=BATCH_SIZE)

history = time_it(cnn_model.fit_generator)(
    generator=train_data_generator,
    epochs=EPOCHS,
    verbose=1,
    validation_data=[data_test_4d, target_test],
    callbacks=[lr_reduction, early_stopping],
)

In [None]:
plt.figure(figsize=(12, 5))

plt.plot(history.history["loss"], label="training loss")
plt.plot(history.history["val_loss"], label="validation loss")

min_loss = np.min(history.history["val_loss"])
min_loss_index = np.argmin(history.history["val_loss"])
plt.plot(min_loss_index, min_loss, "*")
plt.text(min_loss_index, min_loss, f"{min_loss:.4f}")

plt.title("Losses")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(12, 5))

plt.plot(history.history["acc"], label="training accuracy")
plt.plot(history.history["val_acc"], label="validation accuracy")

max_acc = np.max(history.history["val_acc"])
max_acc_index = np.argmax(history.history["val_acc"])
plt.plot(max_acc_index, max_acc, "*")
plt.text(max_acc_index, max_acc, f"{max_acc:.4f}")

plt.title("Accuracies")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid()
plt.show()

In [None]:
y_true = target_test.idxmax(axis=1)
y_pred = np.argmax(cnn_model.predict(data_test_4d), axis=1)

cnf_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred)
print(f"Confusion matrix:\n{cnf_matrix}")

In [None]:
test_data = pd.read_csv("../input/test.csv")
test_data = test_data / 255
test_data_4d = np.array(test_data).reshape((test_data.shape[0],) + INPUT_SHAPE)

predictions_one_hot = cnn_model.predict(test_data_4d)
predictions = pd.DataFrame(predictions_one_hot).idxmax(axis=1)

submission = pd.DataFrame(data={
    "ImageId": test_data.index + 1,
    "Label": predictions
})
submission.to_csv("submission.csv", index=None)
submission.tail(n=12).T