# Treinamento de uma CNN no CIFAR 10

**Nome: Diego Alysson Braga Moreira**


O objetivo deste exercício é implementar e treinar uma rede convolucional que obtenha a maior acurácia no dataset de teste do CIFAR 10.

Podem definir sua própria rede ou utilizar redes pré-treinadas, como a Resnet.

## Inicializando o Neptune

In [None]:
! pip install neptune-client==0.9.1

Collecting neptune-client==0.9.1
[?25l  Downloading https://files.pythonhosted.org/packages/b6/25/757d8828a31dba3e684ac2bbc29b5fee54ec1d5961333c5ba18fb5dcf67f/neptune-client-0.9.1.tar.gz (209kB)
[K     |████████████████████████████████| 215kB 5.2MB/s 
[?25hCollecting bravado
  Downloading https://files.pythonhosted.org/packages/21/ed/03b0c36b5bcafbe2938ed222f9a164a6c0367ce99a9d2d502e462853571d/bravado-11.0.3-py2.py3-none-any.whl
Collecting future>=0.17.1
[?25l  Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)
[K     |████████████████████████████████| 829kB 13.9MB/s 
Collecting PyJWT
  Downloading https://files.pythonhosted.org/packages/3f/32/d5d3cab27fee7f6b22d7cd7507547ae45d52e26030fa77d1f83d0526c6e5/PyJWT-2.1.0-py3-none-any.whl
Collecting websocket-client>=0.35.0
[?25l  Downloading https://files.pythonhosted.org/packages/f7/0c/d52a2a63512a613817846d430d16a8fbe5ea56dd889e89c68facf6

In [None]:
import neptune.new as neptune

# Insira seu api_token para logar os resultados do treino na sua conta do Neptune.
# Como obter seu API token do Neptune:
# https://docs.neptune.ai/administration/security-and-privacy/how-to-find-and-set-neptune-api-token

run = neptune.init(project='d230640/Aula6-Exercicios', api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJmZTg1OGU3Yi1jOWE1LTRjMjEtYTJjNS1hMjMwM2Y3NDRjOTUifQ==')

https://app.neptune.ai/d230640/Aula6-Exercicios/e/AUL6-34


## Importação das bibliotecas

In [None]:
%matplotlib inline
import numpy as np

import torch
from torch.utils.data import DataLoader

import torchvision
from torchvision.datasets import MNIST

torch.manual_seed(123)

<torch._C.Generator at 0x7f28f5e91090>

## Download do Dataset

In [None]:
!mkdir ./data

transform = torchvision.transforms.Compose(
    [torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             download=True, transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                            download=True, transform=transform)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


## Dataset e dataloader

In [None]:
train_size = 40000
val_size = 10000
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

In [None]:
batch_size = 50

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print('Número de minibatches de trenamento:', len(train_dataloader))
print('Número de minibatches de validação:', len(val_dataloader))
print('Número de minibatches de teste:', len(test_dataloader))

x_train, y_train = next(iter(train_dataloader))
print("\nDimensões dos dados de um minibatch:", x_train.size())
print("Valores mínimo e máximo dos pixels: ", torch.min(x_train), torch.max(x_train))
print("Tipo dos dados das imagens:         ", type(x_train))
print("Tipo das classes das imagens:       ", type(y_train))

Número de minibatches de trenamento: 800
Número de minibatches de validação: 200
Número de minibatches de teste: 200

Dimensões dos dados de um minibatch: torch.Size([50, 3, 32, 32])
Valores mínimo e máximo dos pixels:  tensor(-1.) tensor(1.)
Tipo dos dados das imagens:          <class 'torch.Tensor'>
Tipo das classes das imagens:        <class 'torch.Tensor'>


In [None]:
# Use True, para usar o pytorch lightning original
pl_original = False

## Usando o Pytorch Lightining "SuperLight" (criado apenas para o curso).

Criamos um Pytorch Lightning "básico" que esperamos ser mais didático que o original pois o código é facil de entender caso ocorra algum erro.

As classes `LightningModule` e `Trainer` não precisam ser implementadas. Entretanto, para cada nova tarefa, uma classe que herda do `LightningModule` precisa ser definida e os seguintes métodos devem ser implementados:

 - \_\_init\_\_
 - forward
 - train_step
 - train_epoch_end
 - validation_step
 - validation_epoch_end
 - configure_optimizers

Os métodos `test_step` e `test_epoch_end` devem ser implementados apenas se trainer.test() for chamado.


In [None]:
import abc


class LightningModule:
    @abc.abstractmethod
    def __init(self):
        return

    @abc.abstractmethod
    def forward(self):
        return

    @abc.abstractmethod    
    def training_step(self):
        return 

    @abc.abstractmethod    
    def training_epoch_end(self):
        return 

    @abc.abstractmethod    
    def validation_step(self):
        return 

    @abc.abstractmethod    
    def validation_epoch_end(self):
        return 

    @abc.abstractmethod    
    def test_step(self):
        return 

    @abc.abstractmethod    
    def test_epoch_end(self):
        return

    @abc.abstractmethod
    def configure_optimizers(self):
        return

In [None]:
class Trainer():
    def __init__(self, max_epochs: int, gpus: int = 1):
        self.max_epochs = max_epochs
        dev = "cpu" 
        if gpus > 0:
            if torch.cuda.is_available(): 
                dev = "cuda:0"

        print(f'Using {dev}')
        self.device = torch.device(dev)

    def fit(self, model, train_dataloader, val_dataloader=None):
        assert isinstance(model, LightningModule)
        best_valid_loss = 10e9
        optimizers, _ = model.configure_optimizers()
        optimizer = optimizers[0]
        model.model.to(self.device)
        
        for i in range(self.max_epochs):
            outputs = []
            model.model.train()
            for batch_idx, (x_train, y_train) in enumerate(train_dataloader):
                x_train = x_train.to(self.device)
                y_train = y_train.to(self.device)
                output_dict = model.training_step((x_train, y_train), batch_idx)
                loss = output_dict['loss']
                # zero, backpropagation, ajusta parâmetros pelo gradiente descendente
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                outputs.append(output_dict)

            model.training_epoch_end(outputs=outputs)
            
            # Laço de Validação, um a cada época.
            if val_dataloader:
                output_val_end = self.validate(model, val_dataloader)
                print(f'Epoch {i} - {output_val_end["progress_bar"]}')
                # Salvando o melhor modelo de acordo com a loss de validação.
                if output_val_end['valid_loss'] < best_valid_loss:
                    torch.save(model.model.state_dict(), 'best_model.pt')
                    best_valid_loss = output_val_end['valid_loss']

    def validate(self, model, val_dataloader):
        outputs = []
        model.model.eval()
        with torch.no_grad():
            for batch_idx, (x, y) in enumerate(val_dataloader):
                x = x.to(self.device)
                y = y.to(self.device)
                output_dict = model.validation_step((x, y), batch_idx)
                outputs.append(output_dict)

        output_dict = model.validation_epoch_end(outputs=outputs)
        return output_dict

    def test(self, model, test_dataloader):
        outputs = []
        model.model.eval()
        with torch.no_grad():
            for batch_idx, (x, y) in enumerate(test_dataloader):
                x = x.to(self.device)
                y = y.to(self.device)
                output_dict = model.test_step((x, y), batch_idx)
                outputs.append(output_dict)

        output_dict = model.test_epoch_end(outputs=outputs)
        return output_dict

In [None]:
if pl_original:
    !pip install pytorch_lightning
    from pytorch_lightning import LightningModule, Trainer

## Definindo o Modelo

In [None]:
class Modelo(torch.nn.Module):
    def __init__(self):
        super(Modelo, self).__init__()

        # Defina seu modelo aqui.
        self.conv_layer = torch.nn.Sequential(
            torch.nn.Conv2d(3,32,3), #- 30
            torch.nn.BatchNorm2d(32),            
            torch.nn.ReLU(), 

            torch.nn.Conv2d(32,64,3), #- 28
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d((2,2)), #- 14

            torch.nn.Conv2d(64,128,3), #- 12
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d((2,2)), #- 6

            torch.nn.Conv2d(128,256,3), #- 4
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d((2,2)), #- 2
        )

        self.dense = torch.nn.Sequential(
            torch.nn.Dropout(0.1),
            torch.nn.Linear(2*2*256,1000),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(1000,500),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(500,10),
        )
    
    def forward(self, x):
        x = self.conv_layer(x)
        x = x.view(-1,2*2*256)
        x = self.dense(x)
        return x

In [None]:
# class Modelo(torch.nn.Module):
#     def __init__(self):
#         super(Modelo, self).__init__()

#         # Defina seu modelo aqui.
#         self.conv_layer1 = torch.nn.Sequential(
#             torch.nn.Conv2d(3,32,3), #- 30
#             torch.nn.BatchNorm2d(32),            
#             torch.nn.ReLU(), 

#             torch.nn.Conv2d(32,64,3), #- 28
#             torch.nn.BatchNorm2d(64),
#             torch.nn.ReLU(),
#             torch.nn.MaxPool2d((2,2)), #- 14

#             torch.nn.Conv2d(64,128,5), #- 10
#             torch.nn.BatchNorm2d(128),
#             torch.nn.ReLU(),
#         )

#         self.conv_layer2 = torch.nn.Sequential(
#             torch.nn.Conv2d(3,128,3), #- 30
#             torch.nn.BatchNorm2d(128),            
#             torch.nn.ReLU(), 

#             torch.nn.Conv2d(128,64,3), #- 28
#             torch.nn.BatchNorm2d(64),
#             torch.nn.ReLU(),
#             torch.nn.MaxPool2d((2,2)), #- 14

#             torch.nn.Conv2d(64,32,5), #- 10
#             torch.nn.BatchNorm2d(32),
#             torch.nn.ReLU(),
#         )

#         self.dense1 = torch.nn.Sequential(
#             torch.nn.Dropout(0.2),
#             torch.nn.Linear(10*10*128,128),
#             torch.nn.ReLU(),
#             torch.nn.Dropout(0.1),
#             torch.nn.Linear(128,10),
#         )

#         self.dense2 = torch.nn.Sequential(
#             torch.nn.Dropout(0.2),
#             torch.nn.Linear(10*10*32,32),
#             torch.nn.ReLU(),
#             torch.nn.Dropout(0.1),
#             torch.nn.Linear(32,10),
#         )

#         self.dense3 = torch.nn.Sequential(
#             torch.nn.Dropout(0.2),
#             torch.nn.ReLU(),
#             torch.nn.Linear(20,10),
#         )
    
#     def forward(self, x):
#         x_conv1 = self.conv_layer1(x)
#         x_conv2 = self.conv_layer2(x)
#         x_conv1 = x_conv1.view(-1,10*10*128)
#         x_conv2 = x_conv2.view(-1,10*10*32)
#         x1 = self.dense1(x_conv1)
#         x2 = self.dense2(x_conv2)

#         x = torch.cat((x1, x2), 1)
#         x = self.dense3(x)
#         return x

## Criação do modelo Pytorch Lightning

In [None]:
class LightningClassifier(LightningModule):
    def __init__(self, hparams):
        super().__init__()

        self.hparams = hparams
        self.criterion = torch.nn.CrossEntropyLoss(reduction='none')

        # Note como a arquitetura esta dependente dos hiperparâmetros salvos.
        self.model = Modelo()

    def forward(self, x):
        logits = self.model(x)
        preds = logits.argmax(dim=1)
        return logits, preds

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        # x = x.reshape(-1, 32*32*3)
        # predict da rede
        logits = self.model(x)

        # calcula a perda
        batch_losses = self.criterion(logits, y)
        loss = batch_losses.mean()
        run['train/batch_loss'].log(loss)

        # O PL sempre espera um retorno nomeado 'loss' da training_step.
        return {'loss': loss, 'batch_losses': batch_losses}

    def training_epoch_end(self, outputs):
        avg_loss = torch.stack([output['batch_losses'] for output in outputs]).mean()

        run['train/loss'].log(avg_loss)
        return
  
    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        
        # Transforma a entrada para duas dimensões
        # x = x.reshape(-1, 32*32*3)
        # predict da rede
        logits, preds = self.forward(x)

        # calcula a perda
        batch_losses = self.criterion(logits, y)
        # calcula a acurácia
        batch_accuracy = (preds == y)
        
        # Retornamos as losses do batch para podermos fazer a média no validation_epoch_end.
        return {'batch_losses': batch_losses, 'batch_accuracy': batch_accuracy}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([output['batch_losses'] for output in outputs]).mean()
        accuracy = torch.stack([output['batch_accuracy'] for output in outputs]).float().mean()

        run['valid/loss'].log(avg_loss)
        run['valid/acuracy'].log(accuracy)

        metrics = {'valid_loss': avg_loss.item(), 'accuracy': accuracy.item()}
        output =  {'progress_bar': metrics, 'valid_loss': avg_loss.item()}
        return output
  
    def test_step(self, val_batch, batch_idx):
        # A implementação deste método é opcional no Pytorch Lightning.
        x, y = val_batch
        
        # Transforma a entrada para duas dimensões
        # x = x.reshape(-1, 32*32*3)
        # predict da rede
        logits, preds = self.forward(x)

        # calcula a perda
        batch_losses = self.criterion(logits, y)
        # calcula a acurácia
        batch_accuracy = (preds == y)
        
        # Retornamos as losses do batch para podermos fazer a média no validation_epoch_end.
        return {'batch_losses': batch_losses, 'batch_accuracy': batch_accuracy}

    def test_epoch_end(self, outputs):
        # A implementação deste método é opcional no Pytorch Lightning.
        avg_loss = torch.stack([output['batch_losses'] for output in outputs]).mean()
        accuracy = torch.stack([output['batch_accuracy'] for output in outputs]).float().mean()

        run['valid/loss'].log(avg_loss)
        run['valid/acuracy'].log(accuracy)
        metrics = {'Test loss': avg_loss.item(), 'test accuracy': accuracy.item()}
        output =  {'progress_bar': metrics}
        return output

    def configure_optimizers(self):
        # Gradiente descendente
        optimizer = torch.optim.SGD(self.model.parameters(), lr=self.hparams['learning_rate'])
        # Aqui usamos um scheduler dummy pois o pytorch lightning original requer um.
        scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lambda epoch: 1.0)
        return [optimizer], [scheduler]  # Forma de retorno para associar um otimizador a um scheduler.

## Inicialização dos parâmetros

In [None]:
hparams = {
    'max_epochs': 100,
    'learning_rate': 0.1
}

## Treinamento

In [None]:
pl_model = LightningClassifier(hparams=hparams)
trainer = Trainer(max_epochs=hparams['max_epochs'])
trainer.fit(pl_model, train_dataloader, val_dataloader)

Using cuda:0
Epoch 0 - {'valid_loss': 1.0047228336334229, 'accuracy': 0.6502000093460083}
Epoch 1 - {'valid_loss': 0.8556151390075684, 'accuracy': 0.6992999911308289}
Epoch 2 - {'valid_loss': 0.7860113978385925, 'accuracy': 0.7202999591827393}
Epoch 3 - {'valid_loss': 0.7757797837257385, 'accuracy': 0.7276999950408936}
Epoch 4 - {'valid_loss': 0.7304831743240356, 'accuracy': 0.7550999522209167}
Epoch 5 - {'valid_loss': 0.7685660123825073, 'accuracy': 0.7444999814033508}
Epoch 6 - {'valid_loss': 0.6775156855583191, 'accuracy': 0.7792999744415283}
Epoch 7 - {'valid_loss': 0.8972641229629517, 'accuracy': 0.7428999543190002}
Epoch 8 - {'valid_loss': 0.6528781652450562, 'accuracy': 0.7910000085830688}
Epoch 9 - {'valid_loss': 0.6629108190536499, 'accuracy': 0.7980999946594238}
Epoch 10 - {'valid_loss': 0.7468494176864624, 'accuracy': 0.7807999849319458}
Epoch 11 - {'valid_loss': 0.6776213645935059, 'accuracy': 0.8016999959945679}
Epoch 12 - {'valid_loss': 0.7498254776000977, 'accuracy': 0.7

## Teste

In [None]:
trainer.test(pl_model, test_dataloader)

{'progress_bar': {'Test loss': 1.126685380935669,
  'test accuracy': 0.8297999501228333}}

# Modelo com TransferLerning, ResNet50

In [None]:
# Criando os datasets com as transformações mencionadas

transform = torchvision.transforms.Compose(
    [torchvision.transforms.ToTensor(),
     torchvision.transforms.Resize((224, 224)),
     torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])


train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             download=False, transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                            download=False, transform=transform)


In [None]:
train_size = 40000
val_size = 10000
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

In [None]:
batch_size = 50

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print('Número de minibatches de trenamento:', len(train_dataloader))
print('Número de minibatches de validação:', len(val_dataloader))
print('Número de minibatches de teste:', len(test_dataloader))

x_train, y_train = next(iter(train_dataloader))
print("\nDimensões dos dados de um minibatch:", x_train.size())
print("Valores mínimo e máximo dos pixels: ", torch.min(x_train), torch.max(x_train))
print("Tipo dos dados das imagens:         ", type(x_train))
print("Tipo das classes das imagens:       ", type(y_train))

Número de minibatches de trenamento: 800
Número de minibatches de validação: 200
Número de minibatches de teste: 200

Dimensões dos dados de um minibatch: torch.Size([50, 3, 224, 224])
Valores mínimo e máximo dos pixels:  tensor(-2.1179) tensor(2.6400)
Tipo dos dados das imagens:          <class 'torch.Tensor'>
Tipo das classes das imagens:        <class 'torch.Tensor'>


In [None]:
class Modelo(torch.nn.Module):
    def __init__(self):
        super(Modelo, self).__init__()

        # Defina seu modelo aqui.

        self.model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True)
        self.model.fc.out_features=10
    
    def forward(self, x):
        return self.model(x)

In [None]:
hparams = {
    'max_epochs': 8,
    'learning_rate': 0.1
}

In [None]:
pl_model2 = LightningClassifier(hparams=hparams)
trainer2 = Trainer(max_epochs=hparams['max_epochs'])
trainer2.fit(pl_model2, train_dataloader, val_dataloader)

Downloading: "https://github.com/pytorch/vision/archive/v0.9.0.zip" to /root/.cache/torch/hub/v0.9.0.zip
Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))


Using cuda:0
Epoch 0 - {'valid_loss': 0.6489839553833008, 'accuracy': 0.8458999991416931}
Epoch 1 - {'valid_loss': 0.3159894347190857, 'accuracy': 0.896399974822998}
Epoch 2 - {'valid_loss': 0.3112369775772095, 'accuracy': 0.9025999903678894}
Epoch 3 - {'valid_loss': 0.25502705574035645, 'accuracy': 0.9220999479293823}
Epoch 4 - {'valid_loss': 0.2431498020887375, 'accuracy': 0.9317999482154846}
Epoch 5 - {'valid_loss': 0.38828393816947937, 'accuracy': 0.8996999859809875}
Epoch 6 - {'valid_loss': 0.23032362759113312, 'accuracy': 0.9321999549865723}
Epoch 7 - {'valid_loss': 0.239551842212677, 'accuracy': 0.9328999519348145}


In [None]:
trainer.test(pl_model2, test_dataloader)

{'progress_bar': {'Test loss': 0.2328687161207199,
  'test accuracy': 0.9355999827384949}}

# Verificando e testando Fine Tuning 

In [None]:
from torchvision import datasets, models, transforms
feature_extract = True

In [None]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [None]:
class Modelo(torch.nn.Module):
    def __init__(self):
        super(Modelo, self).__init__()

        # Defina seu modelo aqui.
        self.model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True)
        set_parameter_requires_grad(self.model, feature_extract)
        self.model.fc.out_features=1000

        self.dense = torch.nn.Sequential(
            torch.nn.ReLU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(1000,500),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(500,100),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(100,10),
        )

    
    def forward(self, x):
        x = self.model(x)
        x = self.dense(x)

        return x 

In [None]:
pl_model3 = LightningClassifier(hparams=hparams)
trainer3 = Trainer(max_epochs=hparams['max_epochs'])
trainer3.fit(pl_model3, train_dataloader, val_dataloader)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.9.0


Using cuda:0
Epoch 0 - {'valid_loss': 0.7338544726371765, 'accuracy': 0.7488999962806702}
Epoch 1 - {'valid_loss': 0.6471253037452698, 'accuracy': 0.7833999991416931}
Epoch 2 - {'valid_loss': 0.6301283836364746, 'accuracy': 0.7879999876022339}
Epoch 3 - {'valid_loss': 0.6204351186752319, 'accuracy': 0.7854999899864197}
Epoch 4 - {'valid_loss': 0.605130672454834, 'accuracy': 0.7902999520301819}
Epoch 5 - {'valid_loss': 0.5859749913215637, 'accuracy': 0.8014999628067017}
Epoch 6 - {'valid_loss': 0.5832421779632568, 'accuracy': 0.800599992275238}
Epoch 7 - {'valid_loss': 0.5917224287986755, 'accuracy': 0.7960999608039856}
