In [1]:
import os

import numpy as np
import scipy as sc
import pandas as pd

import torch
#from tensorflow import keras

import matplotlib.pyplot as plt

# Extract and list files in dataset
Dataset is stored in an archives so it can be uploaded to github.
The archive is created in the data/fashionMNIST directory which is created if it does not exists.

The notebook also create directories in which save figures and models.

In [2]:
!mkdir -p ../data/fashionMNIST
!unzip -nqq ../data/fashionMNIST.zip -d ../data/fashionMNIST/

!mkdir -p ../figures
!mkdir -p ../models

In [3]:
print("Files in dataset:\n")

for directory, _, filenames in os.walk("../data/fashionMNIST/"):
    for filename in filenames:
        print(os.path.join(directory, filename))
        

Files in dataset:

../data/fashionMNIST/fashion-mnist_test.csv
../data/fashionMNIST/fashion-mnist_train.csv
../data/fashionMNIST/t10k-images-idx3-ubyte
../data/fashionMNIST/t10k-labels-idx1-ubyte
../data/fashionMNIST/train-images-idx3-ubyte
../data/fashionMNIST/train-labels-idx1-ubyte


# Read dataset

Read the dataset from the data/fashionMNIST directory

In [4]:
# Useful constants
img_shape = (28,28)

In [5]:
# Reduce the dataset size
train_size = 500
test_size = 100

# Reading dataset
print("Reading train dataset...",end='')
_ = np.genfromtxt("../data/fashionMNIST/fashion-mnist_train.csv", delimiter=',', skip_header=1, max_rows=train_size)
train_set = {'labels': _[:, 0],
             'imgs'  : _[:, 1:]}
print(f"Done")
print(f"label vector size: {train_set['labels'].shape[0]}")
print(f"imgs matrix size:  {train_set['imgs'].shape[0]}x{train_set['imgs'].shape[1]}")

print("\n\nReading test dataset...",end='')
_ = np.genfromtxt("../data/fashionMNIST/fashion-mnist_test.csv", delimiter=',', skip_header=1, max_rows=test_size)
test_set = {'labels': _[:, 0],
            'imgs'  : _[:, 1:]}
print(f"Done.")
print(f"label vector size: {test_set['labels'].shape[0]}")
print(f"imgs matrix size:  {test_set['imgs'].shape[0]}x{test_set['imgs'].shape[1]}")

del _

Reading train dataset...Done
label vector size: 500
imgs matrix size:  500x784


Reading test dataset...Done.
label vector size: 100
imgs matrix size:  100x784


# Helper functions

Here are the implementation of some usefull functions

In [6]:
def showimg(img, ax=None, shape=(28, 28), cmap='plasma', title="", fontsize=15, axis=False, savefig=True, savedir=None):
    if ax is None: _, ax = plt.subplots()
    if not axis: ax.axis('off')
        
    img = img.reshape(shape)
    ax.imshow(img, cmap=cmap)
    ax.set_title(title, fontsize=fontsize)
    
    if savefig and filename is not None: plt.savefig(f"{filename}{savedir}.png")

In [7]:
def showimgpanel(imgs, n_imgs, panelshape, figsize=(15,15), 
                 labels=None, labels_prefix="Label : ",
                 title="", fontsize_title=30, fontsize_label=15, 
                 savefig=True, savedir=None):
    x,y = panelshape
    n_imgs = min(imgs.shape[0], n_imgs, x*y)
    
    fig = plt.figure(figsize = figsize)
    fig.suptitle(title, fontsize = fontsize_title)
    fig.subplots_adjust(wspace=0.1, hspace=0.2)
    
    for i in range(n_imgs):
        ax = fig.add_subplot(x, y, i+1)
        img = imgs[i]
        
        if labels is None: label = ''
        else: label = labels_prefix + str(int(labels[i]))
        
        showimg(img, ax, title=label, fontsize=fontsize_label, savefig=False)            
        
    if savefig and filename is not None: plt.savefig(f"{savedir}{title}.png")
    

# Model definition
Here we will define the architecture of our models