In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report


In [16]:
torch.manual_seed(42)

<torch._C.Generator at 0x7c84401661f0>

In [17]:
device = ("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [18]:
transform = transforms.Compose([transforms.ToTensor()]) #*** IMPORTANT THE [] REMEBER THIS!!!!!

**Loading Data**

In [19]:

train_data = datasets.FashionMNIST(root='/data', train=True, transform = transform, download = True)
test_data = datasets.FashionMNIST(root='/data', train=False, transform = transform, download = True)

Spliting the data into train, test batches of 64 images per batch

In [20]:
batch_size = 64
train_split = DataLoader(train_data, batch_size = batch_size, shuffle = True)
test_split = DataLoader(test_data, batch_size = batch_size, shuffle = False)

Initializing our Neural Network using convolutional layers, batch norm, Pooling, Flattening and fully connected Linear layers as well as ReLU activation function.

In [21]:
class CNN(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1 = nn.Conv2d(1, 32, 3, 1, 1)
    self.bn1 = nn.BatchNorm2d(32)

    self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
    self.bn2 = nn.BatchNorm2d(64)

    self.pool = nn.MaxPool2d(2,2)
    self.Flatten = nn.Flatten()


    self.fc1 = nn.Linear(7*7*64, 256)
    self.drop1 = nn.Dropout(0.5)

    self.fc2 = nn.Linear(256, 128)
    self.drop2 = nn.Dropout(0.5)

    self.fc3 = nn.Linear(128, 10)


  def forward(self, x):
    x = self.pool(F.relu(self.bn1(self.conv1(x))))
    x = self.pool(F.relu(self.bn2(self.conv2(x))))
    x = self.Flatten(x)
    x = F.relu(self.fc1(x))
    x = self.drop1(x)
    x = F.relu(self.fc2(x))
    x = self.drop2(x)
    x = self.fc3(x)
    return x


Sending the model to GPU(cuda) if availabe for faster computing

In [22]:
model = CNN().to(device)

Defining the loss function as well as the optimizer for out model

In [23]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

Making train reusable function as well as a test one that returns loss and accuracy

In [24]:
def train_step(model, data_loader, loss_fn, optimizer, device):
  model.train()
  total_loss = 0
  correct = 0
  for X, y in data_loader:
    X, y = X.to(device), y.to(device)
    y_pred = model(X)
    loss = loss_fn(y_pred, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
    predicted = y_pred.argmax(dim=1)
    correct += (predicted == y).sum().item()
  avg_loss = total_loss / len(data_loader)
  avg_acc = correct / len(data_loader.dataset)

  return avg_loss, avg_acc

In [25]:
def test_step(model, data_loader, loss_fn, device):
  model.eval()
  total_loss = 0
  correct = 0
  preds = []
  true = []
  with torch.inference_mode():
    for X, y in data_loader:
      X, y = X.to(device), y.to(device)
      y_pred = model(X)
      loss = loss_fn(y_pred, y)
      total_loss += loss.item()
      predicted = y_pred.argmax(dim=1)
      correct += (predicted == y).sum().item()
      preds.extend(predicted.cpu().numpy())
      true.extend(y.cpu().numpy())

  avg_loss = total_loss / len(data_loader)
  avg_acc = correct / len(data_loader.dataset)
  report = classification_report(true, preds)
  return avg_loss, avg_acc, report


The actual train/validation loop mad for max 50 epochs but It has an early stopping to prevent the model from Overfitting

In [26]:

epochs = 50
patience = 3
best_val_loss = float('inf')
epochs_no_improve = 0
for epoch in range(epochs):
  train_loss, train_acc = train_step(model, train_split, loss_fn, optimizer, device)
  test_loss, test_acc, test_report = test_step(model, test_split, loss_fn, device)
  print(f"epoch: {epoch +1 }")
  print(f"train loss: {train_loss} | train accuracy ={train_acc}")
  print(f"test loss: {test_loss} | test_acc = {test_acc}")
  print(f"test report: {test_report}")

  if test_loss < best_val_loss:
    best_val_loss = test_loss
    epochs_no_improve = 0

  else:
    epochs_no_improve += 1
    print("No improvement to the loss")
    if epochs_no_improve == patience:
      print("Early stopping activated the model started overfitting!")
      break

epoch: 1
train loss: 0.5667919671294023 | train accuracy =0.7969833333333334
test loss: 0.34356019793042714 | test_acc = 0.8754
test report:               precision    recall  f1-score   support

           0       0.84      0.80      0.82      1000
           1       1.00      0.96      0.98      1000
           2       0.86      0.75      0.80      1000
           3       0.80      0.95      0.87      1000
           4       0.78      0.77      0.77      1000
           5       0.98      0.97      0.98      1000
           6       0.64      0.69      0.66      1000
           7       0.94      0.96      0.95      1000
           8       0.99      0.95      0.97      1000
           9       0.96      0.95      0.96      1000

    accuracy                           0.88     10000
   macro avg       0.88      0.88      0.88     10000
weighted avg       0.88      0.88      0.88     10000

epoch: 2
train loss: 0.37337280330119105 | train accuracy =0.8705333333333334
test loss: 0.300375313