In [None]:
# Imports for pytorch
import numpy as np
import torch
import torchvision
from torch import nn
import matplotlib
from matplotlib import pyplot as plt
import tqdm

In [None]:
# Creating the datasets
import os
import warnings
from modulefinder import Module
import torchvision

SEED = 1234

np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

unprocessed_train_data = torchvision.datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
)
mean = unprocessed_train_data.data.float().mean() / 255
sd = unprocessed_train_data.data.float().std() / 255

train_transforms = torchvision.transforms.Compose([
                            torchvision.transforms.RandomRotation(5, fill=(0,)),
                            torchvision.transforms.RandomCrop(28, padding=2),
                            torchvision.transforms.ToTensor(),
                            torchvision.transforms.Normalize(mean=[mean], std=[sd])
                                      ])

test_transforms = torchvision.transforms.Compose([
                           torchvision.transforms.ToTensor(),
                           torchvision.transforms.Normalize(mean=[mean], std=[sd])
                                     ])
training_data = torchvision.datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=train_transforms
)

test_data = torchvision.datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=test_transforms
)


Before training a neural network, let's visualize our data first! Running the cell below will display the first 9 images in a 3 by 3 grid.

In [None]:
from matplotlib import pyplot as plt
images = [training_data[i][0] for i in range(9)]
plt.imshow(torchvision.utils.make_grid(torch.stack(images), nrow=3, padding=5).numpy().transpose((1, 2, 0)))

In [None]:
import numpy as np
import torch.utils.data as data

train_data, val_data = data.random_split(training_data, [int(len(training_data)*0.9), int(len(training_data)*0.1)])

In [None]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(val_data)}')
print(f'Number of testing examples: {len(test_data)}')

In [None]:
import copy
val_data = copy.deepcopy(val_data)
val_data.dataset.transform = test_transforms # to make sure the validation data stays the distribution as in training set

In [None]:
BATCH_SIZE = 64

train_iterator = data.DataLoader(train_data,
                                 shuffle=True,
                                 batch_size=BATCH_SIZE)

val_iterator = data.DataLoader(val_data,
                                 batch_size=BATCH_SIZE)

test_iterator = data.DataLoader(test_data,
                                batch_size=BATCH_SIZE)

# Build Model Architecture

In [None]:
import torch.nn.functional as func


class MLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()

        self.input_layer = nn.Linear(input_dim, 250)
        self.hidden_layer = nn.Linear(250, 100)
        self.output_layer = nn.Linear(100, output_dim)

    def forward(self, x):


        batch_size = x.shape[0]

        x = x.view(batch_size, -1)


        h_1 = func.relu(self.input_layer(x))


        h_2 = func.relu(self.hidden_layer(h_1))


        y_pred = self.output_layer(h_2)


        return y_pred, h_2

In [None]:
INPUT_DIM = 28 * 28
OUTPUT_DIM = 10

model = MLP(INPUT_DIM, OUTPUT_DIM)

In [None]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = model.to(device)
CEL = nn.CrossEntropyLoss().to(device)

In [None]:
def calculate_accuracy(y_pred, y):
    first_pred = y_pred.argmax(1, keepdim=True)
    correct = first_pred.eq(y.view_as(first_pred)).sum()
    accuracy = correct.float() / y.shape[0]
    return accuracy



In [None]:
from tqdm.notebook import trange, tqdm
def train(model, iterator, optimizer, CEL, device):

    epoch_loss = 0
    epoch_accuracy = 0

    model.train()

    for (x, y) in tqdm(iterator, desc="Training", leave=False):

        x = x.to(device)
        y = y.to(device)

        optimizer.zero_grad()

        y_pred, _ = model(x)

        loss = CEL(y_pred, y)

        accuracy = calculate_accuracy(y_pred, y)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_accuracy += accuracy.item()

    return epoch_loss / len(iterator), epoch_accuracy / len(iterator)


def evaluate(model, iterator, CEL, device):

    epoch_loss = 0
    epoch_accuracy = 0

    model.eval()

    with torch.no_grad():

        for (x, y) in tqdm(iterator, desc="Evaluating", leave=False):

            x = x.to(device)
            y = y.to(device)

            y_pred, _ = model(x)

            loss = CEL(y_pred, y)

            accuracy = calculate_accuracy(y_pred, y)

            epoch_loss += loss.item()
            epoch_accuracy += accuracy.item()

    return epoch_loss / len(iterator), epoch_accuracy / len(iterator)

In [None]:

EPOCHS = 10

best_val_loss = float('inf')

train_accuracies = []
val_accuracies = []
train_losses = []
val_losses = []

for epoch in trange(EPOCHS):

    train_loss, train_accuracy = train(model, train_iterator, optimizer, CEL, device)
    val_loss, val_accuracy = evaluate(model, val_iterator, CEL, device)

    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'tut1-model.pt')

    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_accuracy*100:.2f}%')
    print(f'\t Val. Loss: {val_loss:.3f} |  Val. Acc: {val_accuracy*100:.2f}%')


plt.plot(range(1, EPOCHS + 1), train_accuracies, label='Train Accuracy')
plt.plot(range(1, EPOCHS + 1), val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Train/Validation Accuracy')
plt.legend()
plt.show()

plt.plot(range(1, EPOCHS + 1), train_losses, label='Train Loss')
plt.plot(range(1, EPOCHS + 1), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Train/Validation Loss')
plt.legend()
plt.show()

In [None]:
train_accuracy