In [1]:
# Get the Fashion-MNIST data (https://github.com/zalandoresearch/fashion-mnist). 
# Bonus if you download the raw files and build the data loaders yourself. 
# Build a PyTorch pipeline to learn the classification
# You will get a bonus if the this pipeline is well-coded : following PyTorch style, using multiprocessing and GPU (request one on Google Collab)
# Present the results of different architectures based on different metrics

In [2]:
# Imports libraries

import torch
import numpy as np
import pandas as pd
import seaborn as sns
sns.set()
import matplotlib.pyplot as plt
import gzip

from torch.nn import Module, Parameter
from torch.utils.data import DataLoader, Dataset
import time

In [3]:
class MyOwnMLP(Module): # inherit from the base class
    def __init__(self): # define the constructor
        super(MyOwnMLP, self).__init__() # call the constructor of the base class
        self.w1 = Parameter(torch.normal(mean=0., std=0.1, size=(784, 100))) # define the parameters
        self.b1 = Parameter(torch.normal(mean=0., std=0.1, size=(1, 100))) 
        self.w2 = Parameter(torch.normal(mean=0., std=0.1, size=(100, 1)))
        self.b2 = Parameter(torch.normal(mean=0., std=0.1, size=(1, 1)))
        

    def forward(self, x): # define the forward pass
        step1  = torch.matmul(x, self.w1) # matrix multiplication
        step2 = torch.add(step1, self.b1) # addition
        step3 = torch.nn.functional.relu(step2) # ReLU # rectified linear unit
        step4 = torch.matmul(step3, self.w2)
        out = torch.add(step4, self.b2)
        return out
    
model = MyOwnMLP() # create an instance of the model

In [32]:
class CustomDataset(Dataset): # inherit from the base class
    def __init__(self, data_x, data_y): # define the constructor
        self.data_x = data_x # store the data
        self.data_y = data_y 

    def __len__(self): # define the length function
        length = len(self.data_x) # get the length of the data
        return length # return the length

    def __getitem__(self, index): # define the get item function
        # Get the x and y at a given position (index) in the data
        x = self.data_x[index] 
        y = self.data_y[index]
        return x, y

In [5]:
# Load the data

# training data
def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = path + 'train-labels-idx1-ubyte.gz'
    images_path = path + 'train-images-idx3-ubyte.gz'
    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,offset=8)
    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,offset=16).reshape(len(labels), 784)
    return images, labels

# test data
def load_mnist_test(path, kind='test'):
    """Load MNIST data from `path`"""
    labels_path = path + 't10k-labels-idx1-ubyte.gz'
    images_path = path + 't10k-images-idx3-ubyte.gz'
    with gzip.open(labels_path, 'rb') as lbpath:
        labels_test = np.frombuffer(lbpath.read(), dtype=np.uint8,offset=8)
    with gzip.open(images_path, 'rb') as imgpath:
        images_test = np.frombuffer(imgpath.read(), dtype=np.uint8,offset=16).reshape(len(labels_test), 784)
    return images_test, labels_test

In [6]:
X_train, y_train = load_mnist('./data/FashionMNIST/raw/', kind='train')
X_test, y_test = load_mnist_test('./data/FashionMNIST/raw/', kind='t10k')

In [7]:
print(f"X_train: {X_train.shape},y_train: {y_train.shape}, X_test: {X_test.shape}, y_test: {y_test.shape}")

X_train: (60000, 784),y_train: (60000,), X_test: (10000, 784), y_test: (10000,)


In [8]:
# copy the array to protect its data and make it writable before converting it to a tensor

X_train2 = np.copy(X_train)
X_test2 = np.copy(X_test)
y_train2 = np.copy(y_train)
y_test2 = np.copy(y_test)

In [34]:
X_train2.flags.writeable = True
X_test2.flags.writeable = True
y_train2.flags.writeable = True
y_test2.flags.writeable = True

In [46]:
torch_data = torch.from_numpy(X_train2).float()
torch_labels = torch.from_numpy(y_train2)
out = model(torch_data)
print(out.shape)

torch.Size([60000, 1])


In [47]:
torch_labels.bincount()

tensor([6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000])

In [10]:
# Loop and wait for each data point in PyTorch
dataset = CustomDataset(data_x=X_train2, data_y=y_train2) # create an instance of the dataset
dataloader = DataLoader(dataset=dataset, batch_size=10, num_workers=0) # create an instance of the data loader
start = time.time() # start the timer
for point in dataloader: # loop over the data
    pass
print('Done in pytorch : ', time.time() - start)

Done in pytorch :  0.2186579704284668


In [11]:
device = 'gpu' if torch.cuda.is_available() else 'cpu' # check if cuda is available
torch_xs = torch_data.to(device) # move the data to the device

In [12]:
n_epochs = 200 # the number of epochs

model = MyOwnMLP() # create an instance of the model
model = model.to(device) # move the model to the device
opt = torch.optim.Adam(model.parameters(), lr=0.01) # the optimizer with the parameters to optimize and the learning rate

dataset = CustomDataset(data_x=torch_data, data_y=torch_labels) # create an instance of the dataset
dataloader = DataLoader(dataset=dataset, batch_size=10, num_workers=0) # create an instance of the data loader

for epoch in range(n_epochs): # loop over the epochs
    for batch_x, batch_y in dataloader: # loop over the data
        # Don't forget to send to device, the rest is similar to what we had above
        batch_x = batch_x.to(device) 
        batch_y = batch_y.to(device) 
        opt.zero_grad() 
        loss = torch.mean((model(batch_x) - batch_y) ** 2) # compute the loss
        loss.backward() # compute the gradients
        opt.step() # update the parameters
    if epoch % 10 == 0: # print the loss every 10 iterations
        print(epoch, loss.item())

# Let's plot our trained model
torch_lsp = torch.from_numpy(X_train2).float()[:, None] 
# torch_lsp

0 7.04637336730957
10 7.28519344329834
20 7.28519344329834
30 7.28519344329834
40 7.28519344329834
50 7.28519344329834
60 7.28519344329834
70 7.28519344329834
80 7.28519344329834
90 7.28519344329834
100 7.28519344329834
110 7.28519344329834
120 7.28519344329834
130 7.28519344329834
140 7.28519344329834
150 7.28519344329834
160 7.28519344329834
170 7.28519344329834
180 7.28519344329834
190 7.28519344329834


tensor([[[0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.]]])

In [13]:
# To easily use the trained model we need to send it back to cpu at the end
model = model.to('cpu') # move the model to the cpu

In [None]:
labels = {0: 'T-shirt/top',
          1: 'Trouser',
          2: 'Pullover',
          3: 'Dress',
          4: 'Coat',
          5: 'Sandal',
          6: 'Shirt',
          7: 'Sneaker',
          8: 'Bag',
          9: 'Ankle Boot'
         }

In [51]:
from sklearn.metrics import accuracy_score