SIN-392 - Introdução ao Processamento Digital de Imagens (2022-1)

# Aula 17 - Redes Neurais Convolucionais

Prof. João Fernando Mari ([*joaofmari.github.io*](https://joaofmari.github.io/))

---
* Para executar utilizando uma GPU usando o Google Colab.
* Habilite a GPU no meu Editar>Configurações de Notebook selecionando a opção GPU em Aceleração de hardware.

# Importando as bibliotecas necessárias
---

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

%matplotlib notebook

# Configurações para reprodutibilidade
---

In [None]:
np.random.seed(1234)

## Configurações para GPU
---

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

cuda:0


# Definimos alguns hiperparâmetros
---

In [None]:
# Tamanho do lote (mini-batch)
batch_size = 4

# Nomes das classes (labels)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# O conjunto de dados
---

In [None]:
# Define uma sequencia de transformações que serão aplicadas sobre as imagens dos datasets
# O primeiro (0.5, 0.5, 0.5) são as médias dos canais RGB e o segundo são os desvios padrões.
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Datasets
# --------
# Conjunto de treinamento
dataset_train = torchvision.datasets.CIFAR10(root='./data', train=True,
                                                download=True, transform=transform)
# Conjunto de testes
dataset_test = torchvision.datasets.CIFAR10(root='./data', train=False,
                                            download=True, transform=transform)
    
# Número de imagens no conjunto de treinamento
print(len(dataset_train))
# Número de imagens no conjunto de testes
print(len(dataset_test))

# Vamos separar 20% do conjunto de treinamento para validação
dataset_train, dataset_val = torch.utils.data.random_split(dataset_train, [int(len(dataset_train) * 0.8), int(len(dataset_train) * 0.2) ])

# Número de imagens no conjunto de treinamento
print(len(dataset_train))
# Número de imagens no conjunto de validação
print(len(dataset_val))

# Dataloaders
# -----------
# Conjunto de treinamento
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size,
                                               shuffle=True, num_workers=2)

# Conjunto de validação
dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=batch_size,
                                               shuffle=True, num_workers=2)
# Conjunto de testes
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size,
                                              shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
50000
10000
40000
10000


## Definindo uma Rede Neural Convolucional simples
---

In [None]:
class Net(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
net = Net(3, 10)
    
print(net)

# Envia o modelo para a GPU
net = net.to(device)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


## Definindo uma função de perda e um otimizador
---

In [None]:
## Função 
criterion = nn.CrossEntropyLoss()

# Otimizador
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

## Treinando o modelo
---

In [None]:
for epoch in range(10):  # loop over the dataset multiple times

    # Treino
    # ------
    # Modo de treinamento
    net.train()

    # Inicializa a perda do treino
    train_loss = 0.0
    
    for inputs, labels in dataloader_train:
        # Envia inouts e labels para a GPU
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # Desabilita o calculo do gradiente
        torch.set_grad_enabled(True) 

        # Forward 
        outputs = net(inputs)

        # Calcula a perda
        loss = criterion(outputs, labels)

        # Backward
        loss.backward()

        # Atualiza os pesos
        optimizer.step()

        # Atualiza a perda do treino
        train_loss += loss.item() * inputs.size(0)

    # Perda da época
    train_loss = train_loss / len(dataset_train)

    # Validação
    # ---------
    # Modo de avaliação
    net.eval()

    # Inicializa a perda da validação
    val_loss = 0.0

    for inputs, labels in dataloader_val:
        # Envia inouts e labels para a GPU
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # Desabilita o calculo do gradiente
        torch.set_grad_enabled(False) 

        # forward + backward 
        outputs = net(inputs)

        # Calcula a perda
        loss = criterion(outputs, labels)

        # Atualiza a perda da validação
        val_loss += loss.item() * inputs.size(0)

    # Perda da validação
    val_loss = val_loss / len(dataset_val)

    # Imprime as perdas de cada época
    print(f'Epoca {epoch}: Train Loss: {train_loss:.3f} \t Val. Loss: {val_loss:.3f}')

print('Treinamento finalizado')

Epoca 0: Train Loss: 1.759 	 Val. Loss: 1.571
Epoca 1: Train Loss: 1.418 	 Val. Loss: 1.360
Epoca 2: Train Loss: 1.285 	 Val. Loss: 1.244
Epoca 3: Train Loss: 1.191 	 Val. Loss: 1.222
Epoca 4: Train Loss: 1.111 	 Val. Loss: 1.173
Epoca 5: Train Loss: 1.046 	 Val. Loss: 1.146
Epoca 6: Train Loss: 0.991 	 Val. Loss: 1.161
Epoca 7: Train Loss: 0.938 	 Val. Loss: 1.202
Epoca 8: Train Loss: 0.904 	 Val. Loss: 1.172
Epoca 9: Train Loss: 0.869 	 Val. Loss: 1.157
Treinamento finalizado


## Testando o modelo
---

In [None]:
correct = 0
total = 0

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for inputs, labels in dataloader_test:

        # Envia inouts e labels para a GPU
        inputs = inputs.to(device)
        labels = labels.to(device)

        # calculate outputs by running images through the network
        outputs = net(inputs)

        # the class with the highest energy is what we choose as prediction
        predicted = torch.argmax(outputs.data, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Acurácia
    accuracy = correct / len(dataset_test)

    print(f'Acurácia: {accuracy}')

print(f'Acurácia do conjunto de testes: {100 * correct // total} %')

Acurácia: 0.6123
Acurácia do conjunto de testes: 61 %


## Bibliografia
---
* PyTorch. Training a Classifier
    * https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
* Nathan Inkawhich. FINETUNING TORCHVISION MODELS. 
    * Disponível em: https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html
* 
    * https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html