# Import Libraries 

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchinfo import summary
import models as cifar_models
import torch.optim as optim


# Set the GPU device

In [2]:
# Is GPU available
device = torch.device("cpu")
is_gpu_available = False

if torch.cuda.is_available():
    print("CUDA is available")
    device = torch.device("cuda:0")
    is_gpu_available = True
elif torch.backends.mps.is_available():
    print("Apple MPS is available")
    device = torch.device("mps:0")
    is_gpu_available = True

torch.set_default_device(device)

Apple MPS is available


# Download and prepare the dataset

In [56]:
batch_size = 128
# Train Phase transformations
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
])

# Test Phase transformations
test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
])

# Data
train_data = datasets.CIFAR10('./data', train=True, download=True, transform=train_transforms)
test_data  = datasets.CIFAR10('./data', train=False, download=True, transform=test_transforms)

# Data loaders
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if is_gpu_available else dict(shuffle=True, batch_size=64)

# train dataloader
train_data_loader = torch.utils.data.DataLoader(train_data, **dataloader_args)
# test dataloader
test_data_loader = torch.utils.data.DataLoader(test_data, **dataloader_args)

# print(exp_data.class_to_idx)
# {'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


# Model

# Summary

In [4]:
model = cifar_models.NetBN().to(device)
summary(model=model, input_size=(1, 3, 32, 32))

Layer (type:depth-idx)                   Output Shape              Param #
NetBN                                    [16, 10]                  --
├─Conv2d: 1-1                            [1, 32, 32, 32]           896
├─Conv2d: 1-2                            [1, 64, 32, 32]           18,496
├─Conv2d: 1-3                            [1, 64, 32, 32]           4,160
├─MaxPool2d: 1-4                         [1, 64, 16, 16]           --
├─Conv2d: 1-5                            [1, 128, 16, 16]          73,856
├─Conv2d: 1-6                            [1, 256, 16, 16]          295,168
├─Conv2d: 1-7                            [1, 512, 16, 16]          1,180,160
├─Conv2d: 1-8                            [1, 512, 16, 16]          262,656
├─MaxPool2d: 1-9                         [1, 512, 8, 8]            --
├─Conv2d: 1-10                           [1, 1024, 8, 8]           4,719,616
├─Conv2d: 1-11                           [1, 2048, 8, 8]           18,876,416
├─Conv2d: 1-12                           

# Define a Loss function and optimizer

In [7]:
criterion = nn.NLLLoss()
optimiser = optim.SGD(params=model.parameters(), lr=0.001, momentum=0.9)

## Train

In [53]:
def train(
    model: 'torch.nn.Module', 
    train_data_loader: 'torch.utils.data.DataLoader', 
    criterion: 'torch.nn.NLLLoss', 
    optimiser: 'torch.optim.Optimizer', 
    epoch, 
    device = torch.device("cpu")
):
    model.to(device)
    model.train()
    running_loss = 0.0

    for batch_idx, data in enumerate(train_data_loader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        
        # zero the parameter gradients
        optimiser.zero_grad()
        # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
        # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.
        
        # forward + backward + optimize
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()

        optimiser.step()

        # print statistics
        running_loss += loss.item()
    
    return running_loss


## Test

In [54]:
def test(
    model: 'torch.nn.Module', 
    test_data_loader: 'torch.utils.data.DataLoader', 
    # criterion: 'torch.nn.NLLLoss', 
    # optimiser: 'torch.optim.Optimizer', 
    epoch, 
    device = torch.device("cpu")
):
    model.to(device)
    correct = 0
    total = 0

    # prepare to count predictions for each class
    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}

    model.eval()
    
    with torch.no_grad():
        for batch_idx, data in enumerate(test_data_loader):
            images, labels = data[0].to(device), data[1].to(device)
            
            # calculate outputs by running images through the network
            outputs = net(images)
            
            # the class with the highest energy is what we choose as prediction
            _, predictions = torch.max(outputs.data, 1)
            # collect the correct predictions for each class
            for label, prediction in zip(labels, predictions):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1

            total += labels.size(0)
            correct += (predictions == labels).sum().item()

    # print accuracy for each class
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
    
    print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')


In [51]:
a = torch.randn(4, 5)
print(a)
print(f"\ndim: {a.dim()} size: {a.size()} shape: {a.shape}")

torch.max(a, 1)

b = torch.zeros(5)
print(b)

c = torch.zeros(5)
print(c)

print(b == c)
print((b == c).sum().item())

tensor([[-0.6575, -0.5631, -1.0503, -1.2959,  0.2355],
        [ 0.9320,  0.1236, -0.1550,  0.6014, -0.1993],
        [ 0.5076,  0.0553,  0.5386, -0.1183, -0.5739],
        [-0.2839, -0.9171, -0.7745,  0.1310, -0.5602]], device='mps:0')

dim: 2 size: torch.Size([4, 5]) shape: torch.Size([4, 5])
tensor([0., 0., 0., 0., 0.], device='mps:0')
tensor([0., 0., 0., 0., 0.], device='mps:0')
tensor([True, True, True, True, True], device='mps:0')
5
