In [1]:
#import libraries and functions to load the data
from matplotlib import pyplot as plt

import os
import time

import numpy as np
import torch
import torch.nn.functional as F
import torchvision
# from torchvision.datasets import FashionMNIST as mnist

### Select Device (cpu, cuda, mps)

In [10]:
#for nvidia 
has_gpu = torch.cuda.is_available()
#for mac
has_mps = getattr(torch,'has_mps',False)
device = "mps" if getattr(torch,'has_mps',False) \
    else "cuda" if torch.cuda.is_available() else "cpu"

print("GPU is", "available" if has_gpu else "NOT AVAILABLE")
print("MPS (Apple Metal) is", "AVAILABLE" if has_mps else "NOT AVAILABLE")
print(f"Target device is {device}")

## Some Hyperparameters 
random_seed = 1
batch_size = 128
learning_rate = 0.01
num_epochs = 15
num_classes = 10

GPU is available
MPS (Apple Metal) is NOT AVAILABLE
Target device is cuda


### Creating Data Transforms for Preprocessing Data from MNIST

In [3]:
from torchvision import transforms, datasets
from torch.utils.data.dataset import random_split
from torchvision.datasets import ImageFolder

train_data_transform = transforms.Compose(
    [
        transforms.Resize(32),
        transforms.RandomCrop((28,28)),
        transforms.ToTensor(),
        transforms.Normalize(0.5,0.5)
    ]
)
test_data_transform = transforms.Compose(
    [
        transforms.Resize(32),
        transforms.CenterCrop((28,28)),
        transforms.ToTensor(),
        transforms.Normalize(0.5,0.5)
    ]
)

### Downloand the MNIST Dataset

In [4]:
#train_dataset = ImageFolder(root="mnist-imgs/train", transform=train_data_transform)
#test_dataset = ImageFolder(root="mnist-imgs/test", transform=test_data_transform)

train_dataset = datasets.FashionMNIST('./FashionMNIST/', train=True, transform=train_data_transform, download=True)
test_dataset = datasets.FashionMNIST('./FashionMNIST/', train=False, transform=test_data_transform, download=True)

train_dataset, valid_dataset = random_split(train_dataset, lengths=[55000, 5000])

### Creating DataLoaders to get mini-batches

In [5]:
from torch.utils.data import DataLoader
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    drop_last=True,
    num_workers=3,
    shuffle=True
)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=batch_size,
    drop_last=False,
    num_workers=3,
    shuffle=False
)
test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    drop_last=False,
    num_workers=3,
    shuffle=False
)

### Visualize the Data

### Create the Convolutional Neural Network Model

In [6]:
class CNN(torch.nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        
        self.num_classes = num_classes
        self.cnn_features = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(8),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(in_channels=8, out_channels=8, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(8),
            
            torch.nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(16),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(16),
            
            
            # torch.nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1),
            # torch.nn.BatchNorm2d(16)
            # torch.nn.ReLU(inplace=True),
            # torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0),

            # torch.nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1),
            # torch.nn.ReLU(inplace=True),
            torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=0),
        )
        self.fc_features = torch.nn.Sequential(
            # torch.nn.AdaptiveAvgPool2d(1),
            torch.nn.Flatten(),
            torch.nn.Linear(2704, 100),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(100, num_classes)
        )
        
    def forward(self, x):
        x = self.cnn_features(x)
        x = self.fc_features(x)
        return x

### Helper function for Accuracy

In [7]:
def accuracy(model, data_loader, device):
    with torch.no_grad():
        model = model.train()
        true_pred = 0
        tot_samples = 0
        for imgs, labels in data_loader:
            imgs = imgs.to(device)
            labels = labels.to(device)
            logits = model(imgs)
            _, label_pred = torch.max(logits, axis=1)
            true_pred += (label_pred==labels).sum()
            tot_samples += labels.shape[0]
        acc = (true_pred/float(tot_samples))*100
    return acc

### Training Loop

In [11]:
torch.manual_seed(random_seed)
model = CNN(num_classes=num_classes)
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)
epoch_loss = []

start = time.time()
for epoch in range(num_epochs):
    model = model.train()
    loss_list = []
    for batch_idx, (imgs, labels) in enumerate(train_loader):
        imgs = imgs.to(device)
        labels = labels.to(device)
        
        ## Forward Propagation - extract features and classify
        logits = model(imgs)
        loss = F.cross_entropy(logits, labels)
        
        loss_list.append(float(loss))

        #zero out the gradients
        optimizer.zero_grad()
        #estimate new gradients
        loss.backward()
        #update parameters
        optimizer.step()
        
        if not (batch_idx + 1) % 100:
            print(
                f"Epoch: {epoch + 1:03d}/{num_epochs:03d} | "
                f"Batch: {batch_idx + 1:03d}/{len(train_loader):03d} | "
                f"Loss: {loss:.4f}"
            )
    mean_loss = round(np.mean(loss_list),5)
    epoch_loss.append(mean_loss)
    # Tracking the Learning Rate Scheduler
    prev_lr = optimizer.param_groups[0]["lr"]
    scheduler.step()
    current_lr = optimizer.param_groups[0]["lr"]
    print(f"Epoch: {epoch+1:03d} Learning Rate {prev_lr:.8f} -> {current_lr:.8f}")


    # Evaluate Performance after each epoch
    model = model.eval()
    tr_acc = accuracy(model, train_loader, device)
    valid_acc = accuracy(model, valid_loader, device)
    print(f"Train Accuracy: {tr_acc:0.3f}")
    print(f"Validation Accuracy: {valid_acc:0.3f}")
    print(f"Time elapsed so far: {(time.time() - start) / 60:.2f} min")

print(f"Total Train Time: {(time.time() - start) / 60:.2f} min")


Epoch: 001/015 | Batch: 100/429 | Loss: 0.5877
Epoch: 001/015 | Batch: 200/429 | Loss: 0.5758
Epoch: 001/015 | Batch: 300/429 | Loss: 0.4897
Epoch: 001/015 | Batch: 400/429 | Loss: 0.4552
Epoch: 001 Learning Rate 0.01000000 -> 0.01000000
Train Accuracy: 82.026
Validation Accuracy: 81.760
Time elapsed so far: 1.69 min
Epoch: 002/015 | Batch: 100/429 | Loss: 0.3974
Epoch: 002/015 | Batch: 200/429 | Loss: 0.3647
Epoch: 002/015 | Batch: 300/429 | Loss: 0.4821
Epoch: 002/015 | Batch: 400/429 | Loss: 0.2986
Epoch: 002 Learning Rate 0.01000000 -> 0.00500000
Train Accuracy: 85.539
Validation Accuracy: 85.600
Time elapsed so far: 3.49 min
Epoch: 003/015 | Batch: 100/429 | Loss: 0.2461
Epoch: 003/015 | Batch: 200/429 | Loss: 0.2427
Epoch: 003/015 | Batch: 300/429 | Loss: 0.2657
Epoch: 003/015 | Batch: 400/429 | Loss: 0.2603
Epoch: 003 Learning Rate 0.00500000 -> 0.00500000
Train Accuracy: 87.895
Validation Accuracy: 87.820
Time elapsed so far: 5.32 min
Epoch: 004/015 | Batch: 100/429 | Loss: 0.4

In [12]:
print(epoch_loss)

[0.59508, 0.42037, 0.3396, 0.32214, 0.28674, 0.27491, 0.25423, 0.24645, 0.23286, 0.22714, 0.2182, 0.21622, 0.21432, 0.20892, 0.20706]


In [11]:
epochs = [i for i in range(1,num_epochs + 1)]
plt.plot(epochs, epoch_loss)


[<matplotlib.lines.Line2D at 0x2b333090280>]

: 

: 

In [13]:
model = model.eval()
ts_acc = accuracy(model, test_loader, device)
print(f"Test Accuracy: {ts_acc:0.3f}")

Test Accuracy: 91.350
