In [0]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import time

from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam


def reformat(df):
    labels = df.iloc[:, 0]
    data = df.iloc[:, 1:]
    reformatted_data = []
    for i in range(len(data)):
        narray = data.iloc[i, :].to_numpy().ravel()
        narray = narray.reshape(int(math.sqrt(len(narray))), int(math.sqrt(len(narray))), 1)
        reformatted_data.append(narray)

    return labels.to_numpy().ravel(), np.array(reformatted_data).astype('float64')


def create_augmented_data(data, labels):
    mirrored_images = []
    mirrored_images_label = []

    for i in range(len(data)):
        mirrored_images.append(np.flip(data[i], 1))
        mirrored_images_label.append(labels[i])

    result = np.append(data, mirrored_images, axis=0)
    result_label = np.append(labels, mirrored_images_label, axis=0)
    return result_label, result

def normalize(data):
    new_data = np.copy(data)
    for i in range(data.shape[0]):
        new_data[i] = new_data[i]/255

    return new_data

def standardize(data):
    new_data = np.copy(data)
    for i in range(data.shape[0]):
        mean = np.mean(data[i])
        std = np.std(data[i])
        new_data[i] = (new_data[i]-mean)/std

    return new_data

def one_hot_encode(labels):
    encoded = []
    max_val = labels.max()
    min_val = labels.min()
    for data in labels:
        r = np.zeros(max_val - min_val + 1)
        r[data - min_val] = 1
        encoded.append(r)

    return np.array(encoded)


def one_hot_to_index(labels):
    return np.argmax(labels, axis=1)


def calc_acc(classes, model, X, y, title):
    correct = 0
    incorrect = 0

    x_result = model.predict(X, verbose=0)

    class_correct = [0] * len(classes)
    class_incorrect = [0] * len(classes)

    for i in range(len(X)):
        act = y[i]
        res = x_result[i]

        actual_label = int(np.argmax(act))
        pred_label = int(np.argmax(res))

        if pred_label == actual_label:
            class_correct[actual_label] += 1
            correct += 1
        else:
            class_incorrect[actual_label] += 1
            incorrect += 1

    acc = float(correct) / float(correct + incorrect)

    result_string = ""
    result_string += "Current Network " + title + " Accuracy: %.3f \n\n" % (acc)
    result_string += "Current Network " + title + " Class Accuracies:\n"
    for i in range(len(classes)):
        tot = float(class_correct[i] + class_incorrect[i])
        class_acc = -1
        if (tot > 0):
            class_acc = float(class_correct[i]) / tot

        result_string += "\t%s: %.3f\n" % (classes[i], class_acc)

    if print_accuracy_to_file:
        if not os.path.isdir("/kaggle/working/result"):
            os.mkdir("/kaggle/working/result")
        f = open("/kaggle/working/result/" + title.lower() + "_result.txt", "w+")
        f.write(result_string)
        f.close()
        print("Printing " + title + " accuracy is done!")
    else:
        print(result_string)


def get_model():
    classifier = Sequential()

    classifier.add(Convolution2D(64, kernel_size=3, input_shape=(28, 28, 1), activation='relu', padding='same'))

    classifier.add(BatchNormalization())

    classifier.add(Convolution2D(64, kernel_size=3, input_shape=(28, 28, 1), activation='relu'))

    classifier.add(BatchNormalization())

    classifier.add(MaxPooling2D(2))

    classifier.add(Dropout(0.25))

    classifier.add(Convolution2D(64, kernel_size=3, input_shape=(28, 28, 1), activation='relu', padding='same'))

    classifier.add(BatchNormalization())

    classifier.add(Convolution2D(64, kernel_size=3, input_shape=(28, 28, 1), activation='relu'))

    classifier.add(BatchNormalization())

    classifier.add(MaxPooling2D(2))

    classifier.add(Dropout(0.25))

    classifier.add(Flatten())

    classifier.add(Dense(units=512, activation='relu'))

    classifier.add(BatchNormalization())

    classifier.add(Dropout(0.2))

    classifier.add(Dense(units=256, activation='relu'))

    classifier.add(BatchNormalization())

    classifier.add(Dropout(0.1))

    classifier.add(Dense(units=128, activation='relu'))

    classifier.add(BatchNormalization())

    classifier.add(Dense(units=10, activation='softmax'))

    return classifier

# Settings

data_augmentation = True

set_already_trained_model = False
plot_loss = False
save_loss_plot = True
print_accuracy_to_file = True  # If false, accuracy will be printed on console

classes = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag",
           "Ankle boot"]

# Data Preprocess

raw_train = pd.read_csv("/kaggle/input/fashionmnist/fashion-mnist_train.csv")
raw_test = pd.read_csv("/kaggle/input/fashionmnist/fashion-mnist_test.csv")
print("Data loaded")
train_labels, train = reformat(raw_train)
test_labels, test = reformat(raw_test)
print("Data reformatted")
# Data Augmentation

if data_augmentation:
    train_labels, train = create_augmented_data(train, train_labels)

print("Data augmented")
    
train = normalize(train)
test = normalize(test)

print("Data normalized")

train_labels = one_hot_encode(train_labels)
test_labels = one_hot_encode(test_labels)

if not set_already_trained_model:
    print("Training started")
    start_time = time.time()

    # Model creation
    classifier = get_model()

    classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Setting callbacks
    checkpoint = ModelCheckpoint('/kaggle/working/weights.hdf5', monitor='val_loss', save_best_only=True)

    history = classifier.fit(train,
                             train_labels,
                             epochs=100,
                             shuffle=True,
                             validation_split=1 / 6,
                             use_multiprocessing=True,
                             callbacks=[checkpoint])

    elapsed_time = time.time() - start_time
    print("Train finished in " + str(elapsed_time) + " seconds")

    # Plotting

    if plot_loss or save_loss_plot:
        plt.plot(history.history['loss'], label="Train")
        plt.plot(history.history['val_loss'], label="Validation")
        plt.legend()
        if plot_loss:
            plt.show()
        elif save_loss_plot:
            plt.savefig('plot.png')

# Loading

else: #IF set_already_trained_model is TRUE
    classifier = get_model()

    classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    classifier.load_weights('/kaggle/working/weights.hdf5')

# Reporting

calc_acc(classes, classifier, test, test_labels, "Test")