# **SIN 393 – Introduction to Computer Vision (2024)**

# Lecture 06 - Part 1 - Convolutional Neural Networks

Prof. João Fernando Mari ([*joaofmari.github.io*](https://joaofmari.github.io/))

---

## Mounting Google Drive
---

* If you are running on Google Colab, don't forget to enable GPU access.
    * Edit >> Laptop Settings >> Hardware Accelerator
    * Select GPU
    * OK
* After use, disable access.

In [1]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

# DEBUG
print(IN_COLAB)

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

False


## Importing the required libraries
---

In [2]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

%matplotlib notebook

## Checking GPU Access
---

In [3]:
# Check if the GPU is available
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('\nDevice: {0}'.format(DEVICE))


Device: cuda


In [4]:
!nvidia-smi

Thu Nov 28 08:27:23 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1050 Ti     Off | 00000000:01:00.0 Off |                  N/A |
| 45%   23C    P8              N/A /  75W |      8MiB /  4096MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Settings for reproducibility
---

In [5]:
np.random.seed(1234)

## Setting some hyperparameters
---

In [6]:
# Class names
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Batch size (mini-batch size)
batch_size = 8 # 4

# Number of training epochs
epochs = 50 # Use a small value during development, 2 for example.

## The dataset
---

In [7]:
# Define uma sequencia de transformações que serão aplicadas sobre as imagens dos datasets
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Datasets
# --------
# Training set
dataset_train = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             download=True, transform=transform)
# Test set
dataset_test = torchvision.datasets.CIFAR10(root='./data', train=False,
                                            download=True, transform=transform)

# Dataloaders
# -----------
# Training set
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size,
                                               shuffle=True, num_workers=2)
# Test set
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size,
                                              shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [8]:
print(len(dataset_train))
print(len(dataset_test))

print(len(dataloader_train))
print(len(dataloader_test))

50000
10000
6250
1250


## Defining a simple Convolutional Neural Network
---

In [9]:
class Net(nn.Module):
    """
    Considering each image having 32 x 32 x 3:

    Input [3, 32, 32] 
    Conv1(3, 6, 5) [6, 28, 28] 
    Pool(2, 2) [6, 14, 14] 
    Conv2(6, 16, 5) [16, 10, 10]
    Pool(2, 2) [16, 5, 5]
    Flatten [400]
    Fc1 [120]
    Fc2 [84]
    Fc3 [10]
    """
    def __init__(self):
        """
        torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, 
                        dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
        torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
        torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)
        """
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [10]:
# Instantiates an object of the Net class
net = Net()

# Send model to GPU
net = net.cuda() 

print(net)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


## Loss function and optimizer

In [11]:
# Loss function - Cross entropy
criterion = nn.CrossEntropyLoss()

# Optimizator - Stochastic Gradient Descent
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

## Training the model
---

In [12]:
# Iterates over the dataset for a number of epochs.
for epoch in range(epochs):  

    epoch_loss = 0.0

    # Training
    # --------
    for i, (inputs, labels) in enumerate(dataloader_train, 0):
        # Send data to GPU
        inputs = inputs.to(DEVICE) 
        labels = labels.to(DEVICE) 

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward 
        outputs = net(inputs)
        # Compute the loss function
        loss = criterion(outputs, labels)
        
        # Backward
        loss.backward()
        
        # Optimizes the parameters (weights)
        optimizer.step()
        
        # Update epoch loss
        epoch_loss += loss.item()

    epoch_loss = epoch_loss / i
    print(f'Epoch {epoch + 1}: {epoch_loss:.4f}')
    
    epoch_loss = 0.0

print('\nTraining finished!')

Epoch 1: 1.8734
Epoch 2: 1.4613
Epoch 3: 1.2794
Epoch 4: 1.1752
Epoch 5: 1.0958
Epoch 6: 1.0254
Epoch 7: 0.9727
Epoch 8: 0.9216
Epoch 9: 0.8824
Epoch 10: 0.8432
Epoch 11: 0.8071
Epoch 12: 0.7772
Epoch 13: 0.7423
Epoch 14: 0.7217
Epoch 15: 0.6936
Epoch 16: 0.6715
Epoch 17: 0.6507
Epoch 18: 0.6320
Epoch 19: 0.6092
Epoch 20: 0.5884
Epoch 21: 0.5757
Epoch 22: 0.5605
Epoch 23: 0.5413
Epoch 24: 0.5267
Epoch 25: 0.5146
Epoch 26: 0.5044
Epoch 27: 0.4960
Epoch 28: 0.4859
Epoch 29: 0.4775
Epoch 30: 0.4579
Epoch 31: 0.4489
Epoch 32: 0.4468
Epoch 33: 0.4370
Epoch 34: 0.4374
Epoch 35: 0.4272
Epoch 36: 0.4289
Epoch 37: 0.4107
Epoch 38: 0.4103
Epoch 39: 0.4030
Epoch 40: 0.4004
Epoch 41: 0.3988
Epoch 42: 0.3904
Epoch 43: 0.3854
Epoch 44: 0.3735
Epoch 45: 0.3845
Epoch 46: 0.3835
Epoch 47: 0.3779
Epoch 48: 0.3804
Epoch 49: 0.3827
Epoch 50: 0.3719

Training finished!


## Evaluating the model over the test set
---

In [13]:
# Number of correctly classified images
correct = 0
# Total number of images
total = 0

# It is not necessary to calculate the gradients.
with torch.no_grad():
    for inputs, labels in dataloader_test:
        
        # Send data to GPU
        inputs = inputs.to(DEVICE) 
        labels = labels.to(DEVICE) 
        
        # Forward
        outputs = net(inputs)

        # Prediction
        _, predicted = torch.max(outputs.data, 1)

        # Update the number of images
        total += labels.size(0)
        # Updates the number of correct classifications
        correct += (predicted == labels).sum().item()

# Compute the accuracy over the test set
accuracy = 100 * correct / total

print(f'Network accuracy over the test set: {accuracy:.4f} %')

Network accuracy over the test set: 60.6200 %


## Bibliography
---
* PyTorch. Training a Classifier
    * https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
* Microsoft. Train your image classifier model with PyTorch.
    * https://learn.microsoft.com/en-us/windows/ai/windows-ml/tutorials/pytorch-train-model