In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
from tensorflow import keras
from keras import models, layers, optimizers, callbacks, losses, metrics
import keras_tuner as kt

In [2]:
train_data = pd.read_csv("./datasets/train.csv")
test_data = pd.read_csv("./datasets/test.csv")

In [3]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB


In [4]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28000 entries, 0 to 27999
Columns: 784 entries, pixel0 to pixel783
dtypes: int64(784)
memory usage: 167.5 MB


In [5]:
X_train = train_data.drop(columns = "label").values
y_train = train_data["label"].values

X_test = test_data.values

In [6]:
mean, std = np.mean(X_train), np.std(X_train)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

In [7]:
X_train.shape, X_test.shape

((42000, 784), (28000, 784))

In [8]:
X_train = X_train.reshape(-1, 28, 28)
X_test = X_test.reshape(-1, 28, 28)

X_train.shape, X_test.shape

((42000, 28, 28), (28000, 28, 28))

In [9]:
X_train = X_train[..., tf.newaxis]
X_test = X_test[..., tf.newaxis]

X_train = tf.image.resize(X_train, [32, 32])
X_test = tf.image.resize(X_test, [32, 32])

X_train.shape, X_test.shape

(TensorShape([42000, 32, 32, 1]), TensorShape([28000, 32, 32, 1]))

In [34]:
AUTOTUNE = tf.data.AUTOTUNE

def preprocess(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    return image, label

ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))
ds = ds.shuffle(buffer_size=10000, seed=42)

val_size = int(0.2 * len(X_train))
val_ds = ds.take(val_size)
train_ds = ds.skip(val_size)

train_ds = train_ds.map(preprocess, num_parallel_calls=AUTOTUNE).batch(32).prefetch(AUTOTUNE)
val_ds = val_ds.map(preprocess, num_parallel_calls=AUTOTUNE).batch(32).prefetch(AUTOTUNE)

In [39]:
model = models.Sequential([
    layers.Conv2D(filters = 64, kernel_size = (5, 5), activation = "relu", name = "block1_conv1"),
    layers.Conv2D(filters = 64, kernel_size = (5, 5), activation = "relu", name = "block1_conv2"),
    layers.BatchNormalization(name = "block1_normalization"),
    layers.MaxPool2D(pool_size = 2, name = "block1_maxpooling"),
    layers.Dropout(rate = 0.2, name = "block1_dropout"),
    layers.Conv2D(filters = 128, kernel_size = (3, 3), activation = "relu", name = "block2_conv1"),
    layers.Conv2D(filters = 128, kernel_size = (3, 3), activation = "relu", name = "block2_conv2"),
    layers.BatchNormalization(name = "block2_normalization"),
    layers.MaxPool2D(pool_size = 2, name = "block2_maxpooling"),
    layers.Dropout(rate = 0.2, name = "block2_dropout"),
    layers.Flatten(name = "Flatten"),
    layers.Dense(units = 256, activation = "relu", name = "fc1"),
    layers.Dense(units = 10, activation = "softmax", name = "prediction")
])

model.compile(
    optimizer = optimizers.Adam(learning_rate = 0.001, epsilon = 1e-08, decay = 1e-5),
    loss = losses.SparseCategoricalCrossentropy(),
    metrics = [
        metrics.SparseCategoricalAccuracy(name = "accuracy")
    ]
)

In [40]:
history = model.fit(
    train_ds,
    validation_data = val_ds,
    epochs = 30,
    callbacks = [
        callbacks.EarlyStopping(monitor = "val_loss", patience = 5, restore_best_weights = True),
        callbacks.ReduceLROnPlateau(monitor = "val_loss", factor = 0.5, patience = 3, min_lr = 0.00001)
    ]
)

Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [41]:
best_result = pd.DataFrame(history.history).iloc[np.argmin(history.history["val_loss"])]
best_result

loss            0.003705
accuracy        0.998750
val_loss        0.003703
val_accuracy    0.998333
lr              0.000250
Name: 29, dtype: float64

In [30]:
prediction = model.predict(X_test)
prediction = np.argmax(prediction, axis=1)



In [31]:
prediction

array([2, 0, 9, ..., 3, 9, 2], dtype=int64)

In [32]:
pred_df = pd.DataFrame([], columns = ["ImageId", "Label"])
pred_df["ImageId"] = test_data.index + 1
pred_df["Label"] = prediction
pred_df

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [33]:
pred_df.to_csv("./results/augmentation_two_conv_blocks.csv", index = False)