In [None]:
### install packages
import os

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [None]:
# set path variables
path_data = 'data'
path_submission = 'submissions'

In [None]:
### helper functions
def process_images(X, augment=False):

    # normalize image data
    X = X / 255

    # reshape into (n_samples, width, height, 1)
    X = X.reshape(X.shape + (1,))

    if augment:
        # vertical reflection to augment data set
        X_vertical = np.flip(X,2)
        X = np.concatenate((X, X_vertical), 0)
    return X

def process_labels(y, augment=False):
    
    # drop 'ID' column
    if augment:
        return  np.concatenate((y[:,1], y[:,1]), 0) 

    else:
        return y[:,1]

def predict(X, model):
    y = model.predict(X)
    return np.argmax(y, axis=1)

def categorical_to_label(y):
    return np.argmax(y, axis=1)

def get_acc(X, y, model):
    preds = predict(X, model)
    # acc = np.mean(preds==categorical_to_label(y))
    acc = np.mean(preds==y)
    return acc

In [None]:
def get_model_1():
    """model architecture based on Kaggle kernel https://www.kaggle.com/bugraokcu/cnn-with-keras"""

    model = Sequential([
        Conv2D(filters=32, kernel_size=3, activation='relu', kernel_initializer='he_normal', input_shape=im_shape),
        MaxPooling2D(2,2),
        Dropout(0.25),
        Conv2D(filters=64, kernel_size=3, activation='relu'),
        MaxPooling2D(2,2),
        Dropout(0.25),
        Conv2D(filters=128, kernel_size=3, activation='relu'),
        Dropout(0.4),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(10, activation='softmax')
    ])

    model.compile(optimizer=Adam(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
  
    return model

In [None]:
def get_model_2():
    """madel architecture based on Kaggle kernel https://www.kaggle.com/fuzzywizard/fashion-mnist-cnn-keras-accuracy-93"""

    model = Sequential([
        Conv2D(filters=32, kernel_size=3, activation='relu', kernel_initializer='he_normal', input_shape=im_shape),
        BatchNormalization(),
        Conv2D(filters=32, kernel_size=3, activation='relu'),
        BatchNormalization(),
        Dropout(0.25),
        Conv2D(filters=64, kernel_size=3, activation='relu'),
        MaxPooling2D(2,2),
        Dropout(0.25),
        Conv2D(filters=128, kernel_size=3, activation='relu'),
        BatchNormalization(),
        Dropout(0.25),
        Flatten(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])

    model.compile(optimizer=Adam(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
  
    return model

In [None]:
def get_model_3():
    """modified model 2"""

    model = Sequential([
        Conv2D(filters=32, kernel_size=3, activation='relu', kernel_initializer='he_normal', input_shape=im_shape),
        BatchNormalization(),
        Conv2D(filters=32, kernel_size=3, activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2,2),
        Dropout(0.25),
        Conv2D(filters=64, kernel_size=3, activation='relu'),
        MaxPooling2D(2,2),
        Dropout(0.25),
        Conv2D(filters=128, kernel_size=3, activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2,2),
        Dropout(0.25),
        Flatten(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])

    model.compile(optimizer=Adam(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
  
    return model

In [None]:
### code for k-fold cross-validation (not used)

# epochs = 25
# batch_size = 256
# n_splits=10

# # load images and labels
# X = np.load('train_images.npy')
# y = np.loadtxt('train_labels.csv', delimiter=',', skiprows=1)

# # process data
# X = process_images(X)
# y = process_labels(y)

# # get image shape
# im_shape = X.shape[1:]

# kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=3791)

# acc = []
# val_acc = []

# for train, val in kfold.split(X, y):

#   model = get_model()
#   model.fit(X[train], y[train], epochs=epochs, batch_size=batch_size, validation_data=(X[val], y[val]))

#   acc.append(get_acc(X[train], y[train], model))
#   val_acc.append(get_acc(X[val], y[val], model))

# print('Mean training accuracy:', np.mean(acc))
# print('Mean validation accuracy:', np.mean(val_acc))

In [None]:
### train model

models = {1:get_model_1, 2:get_model_2, 3:get_model_3}

# hyperparameters for final model
model_id = 2
augment = True
epochs = 25
batch_size = 32

# load images and labels
X_train = np.load(os.path.join(path_data, 'train_images.npy'))
y_train = np.loadtxt(os.path.join(path_data, 'train_labels.csv'), delimiter=',', skiprows=1)

# process data
X_train = process_images(X_train, augment=augment)
y_train = process_labels(y_train, augment=augment)

# get image shape
im_shape = X_train.shape[1:]

# train-validation split
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=3791)

# model = get_model_1()
model = models[model_id]()
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val,y_val))
print('Training accuracy:', get_acc(X_train, y_train, model))
print('Validation accuracy:', get_acc(X_val, y_val, model))

In [None]:
### create submission.csv file

# test model
X_test = np.load(os.path.join(path_data, 'test_images.npy')).squeeze()
y_test = predict(process_images(X_test), model)

# save predictions
# df_test = pd.read_csv(os.path.join(path_submission, 'sample_submission.csv'))
# df_test['label'] = y_test
# df_test.to_csv(os.path.join(path_submission, 'submission.csv'), index=False)