# Welcome to the Fashion-MNIST Challenge!

Website reference: https://github.com/zalandoresearch/fashion-mnist

In [None]:
##################################################
# Imports
##################################################

import numpy as np
import cv2
import os
import pandas as pd
import matplotlib.pyplot as plt

##################################################
# Custom imports
##################################################

import tensorflow as tf
# from keras.callbacks import ReduceLROnPlateau
from sklearn import preprocessing
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator


##################################################
# Params
##################################################

DATA_BASE_FOLDER = '/kaggle/input/image-classification-fashion-mnist'

# Dataset

The dataset contains 50k train + 10k validation images of 10 different categories ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot').

Each image is a 28x28 grayscale, and for simplicity here is flattened into a 784 dimensional vector.

In [None]:
##################################################
# Load dataset
##################################################

x_train = np.load(os.path.join(DATA_BASE_FOLDER, 'train.npy'))
x_valid = np.load(os.path.join(DATA_BASE_FOLDER, 'validation.npy'))
x_test = np.load(os.path.join(DATA_BASE_FOLDER, 'test.npy'))
y_train = pd.read_csv(os.path.join(DATA_BASE_FOLDER, 'train.csv'))['class'].values
y_valid = pd.read_csv(os.path.join(DATA_BASE_FOLDER, 'validation.csv'))['class'].values
y_labels = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

# Plot random images of different classes
plt.figure(figsize=(25, 5))
for idx in range(20):
    plt.subplot(1, 20, idx + 1)
    img = x_train[idx].reshape(28, 28)
    plt.title(f'{y_labels[y_train[idx]]}')
    plt.imshow(img, cmap='gray')
    plt.axis('off')
plt.show()

In [None]:
#Data Shape
print("Data shape")
print("x_train: ",x_train.shape)
print("x_valid: ",x_valid.shape)
print("x_test: ",x_test.shape)

In [None]:
#Reshaping the data for Keras Neural Networks

##################################################
  # Input shape has (batch_size, height, width, channels). 
  # Incase of RGB image would have a channel of 3 and the greyscale image would have a channel of 1
##################################################

x_train = x_train.reshape(x_train.shape[0],28,28,1)
x_valid = x_valid.reshape(x_valid.shape[0],28,28,1)
x_test = x_test.reshape(x_test.shape[0],28,28,1)


In [None]:
#input shape of data
print("x_train: ",x_train.shape)
print("x_valid: ",x_valid.shape)
print("x_test: ",x_test.shape)

In [None]:
##################################################
# Process the data here, if needed
##################################################

'''
Any manipulation of the dataset in order to feed the data to the algorithm in the correct "format".
'''

#Data Augmentation

# create data generator for train and validation sets
train_datagen =ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        horizontal_flip=True, 
        featurewise_center=True, 
        featurewise_std_normalization=True)
test_datagen =ImageDataGenerator(
        rescale=1./255)


# fit parameters from data
datagen.fit(x_train)

# configure batch size and retrieve one batch of images
for X_batch, y_batch in datagen.flow(x_train, y_train, batch_size=9):
    # create a grid of 3x3 images
    for i in range(0, 9):
        plt.subplot(330 + 1 + i)
        plt.imshow(X_batch[i].reshape(28, 28), cmap=plt.get_cmap('gray'))
    # show the plot
    plt.show()
    break


# Model

Here you have to implement a model (or more models, for finding the most accurate) for classification.

You can use the [`sklearn`](https://scikit-learn.org/stable/) (or optionally other more advanced frameworks such as [`pytorch`](https://pytorch.org/) or [`tensorflow`](https://www.tensorflow.org/)) package that contains a pool of models already implemented that perform classification. (SVMs, NNs, LR, kNN, ...)

In [None]:
##################################################
# Implement you model here
##################################################

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64,(3,3),activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256,activation='relu'),
    tf.keras.layers.Dropout(0.35),
    tf.keras.layers.Dense(10,activation='softmax')
])
model.compile(optimizer="adam",loss="sparse_categorical_crossentropy",metrics=['accuracy'])



In [None]:
# batches of augmented image data
# generates augmented images from x_train
train_generator = train_datagen.flow(x_train,y_train, batch_size=32)
valid_generator = test_datagen.flow(x_valid,y_valid, batch_size=32)

model.fit_generator(
        train_generator,
        epochs=20,
        validation_data=valid_generator)
#NAME MODEL.FIT = HISTORY so you can plot afterwards

#model.save_weights('first_try.h5')

SOURCES:
    https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
    https://stackoverflow.com/questions/60157742/convolutional-neural-network-cnn-input-shape
    https://keras.io/api/preprocessing/image/
    https://machinelearningmastery.com/how-to-configure-image-data-augmentation-when-training-deep-learning-neural-networks/

NEXT:
Try other preprocessing techniques 
Try other models 
Try LSTM AND RNN 
TRY COMBINED RNN WITH CNN 


# Evaluation

In [None]:
##################################################
# Evaluate the model here
##################################################

# Use this function to evaluate your model
def accuracy(y_pred, y_true):
    '''
    input y_pred: ndarray of shape (N,)
    input y_true: ndarray of shape (N,)
    '''
    return (1.0 * (y_pred == y_true)).mean()

# Report the accuracy in the train and validation sets.









# Send the submission for the challenge

In [None]:
##################################################
# Save your test prediction in y_test_pred
##################################################

y_test_pred = None

# Create submission
submission = pd.read_csv(os.path.join(DATA_BASE_FOLDER, 'sample_submission.csv'))
if y_test_pred is not None:
    submission['class'] = y_test_pred
submission.to_csv('my_submission.csv', index=False)