# Treinamento de uma CNN no CIFAR 10

**Nome: ** Vinicius Rodrigues Sanches


O objetivo deste exercício é implementar e treinar uma rede convolucional que obtenha a maior acurácia no dataset de teste do CIFAR 10.

Podem definir sua própria rede ou utilizar redes pré-treinadas, como a Resnet.

## Inicializando o Neptune

In [None]:
! pip install neptune-client==0.9.1



In [None]:
import neptune.new as neptune

# Insira seu api_token para logar os resultados do treino na sua conta do Neptune.
# Como obter seu API token do Neptune:
# https://docs.neptune.ai/administration/security-and-privacy/how-to-find-and-set-neptune-api-token

run = neptune.init(project='vinicius-sanches/Aula6', api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NjczZjk2MS1jZGE0LTQ4N2MtOTAwNi1kMGE1MjNiOGVkYjAifQ==')

https://app.neptune.ai/vinicius-sanches/Aula6/e/AUL1-75


## Importação das bibliotecas

In [None]:
%matplotlib inline
import numpy as np

import torch
from torch.utils.data import DataLoader

import torchvision
from torchvision.datasets import MNIST
from torchvision.models import resnet50

torch.manual_seed(123)

<torch._C.Generator at 0x7f7fcb82c090>

## Download do Dataset

In [None]:
! mkdir ./data

#transform = torchvision.transforms.Compose(
#    [torchvision.transforms.ToTensor(),
#     torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

## Normalização Eduardo

normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

transform = torchvision.transforms.Compose([
                    torchvision.transforms.ToTensor(),
                    torchvision.transforms.Resize((224, 224)),
                    normalize,
        ])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             download=True, transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                            download=True, transform=transform)


mkdir: cannot create directory ‘./data’: File exists
Files already downloaded and verified
Files already downloaded and verified


## Dataset e dataloader

In [None]:
train_size = 40000
val_size = 10000
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

In [None]:
batch_size = 500

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print('Número de minibatches de trenamento:', len(train_dataloader))
print('Número de minibatches de validação:', len(val_dataloader))
print('Número de minibatches de teste:', len(test_dataloader))

x_train, y_train = next(iter(train_dataloader))
print("\nDimensões dos dados de um minibatch:", x_train.size())
print("Valores mínimo e máximo dos pixels: ", torch.min(x_train), torch.max(x_train))
print("Tipo dos dados das imagens:         ", type(x_train))
print("Tipo das classes das imagens:       ", type(y_train))

Número de minibatches de trenamento: 80
Número de minibatches de validação: 20
Número de minibatches de teste: 20

Dimensões dos dados de um minibatch: torch.Size([500, 3, 224, 224])
Valores mínimo e máximo dos pixels:  tensor(-2.1179) tensor(2.6400)
Tipo dos dados das imagens:          <class 'torch.Tensor'>
Tipo das classes das imagens:        <class 'torch.Tensor'>


In [None]:
# Use True, para usar o pytorch lightning original
pl_original = False

## Usando o Pytorch Lightining "SuperLight" (criado apenas para o curso).

Criamos um Pytorch Lightning "básico" que esperamos ser mais didático que o original pois o código é facil de entender caso ocorra algum erro.

As classes `LightningModule` e `Trainer` não precisam ser implementadas. Entretanto, para cada nova tarefa, uma classe que herda do `LightningModule` precisa ser definida e os seguintes métodos devem ser implementados:

 - \_\_init\_\_
 - forward
 - train_step
 - train_epoch_end
 - validation_step
 - validation_epoch_end
 - configure_optimizers

Os métodos `test_step` e `test_epoch_end` devem ser implementados apenas se trainer.test() for chamado.


In [None]:
import abc


class LightningModule:
    @abc.abstractmethod
    def __init(self):
        return

    @abc.abstractmethod
    def forward(self):
        return

    @abc.abstractmethod    
    def training_step(self):
        return 

    @abc.abstractmethod    
    def training_epoch_end(self):
        return 

    @abc.abstractmethod    
    def validation_step(self):
        return 

    @abc.abstractmethod    
    def validation_epoch_end(self):
        return 

    @abc.abstractmethod    
    def test_step(self):
        return 

    @abc.abstractmethod    
    def test_epoch_end(self):
        return

    @abc.abstractmethod
    def configure_optimizers(self):
        return

In [None]:
class Trainer():
    def __init__(self, max_epochs: int, gpus: int = 1):
        self.max_epochs = max_epochs
        dev = "cpu" 
        if gpus > 0:
            if torch.cuda.is_available(): 
                dev = "cuda:0"

        print(f'Using {dev}')
        self.device = torch.device(dev)

    def fit(self, model, train_dataloader, val_dataloader=None):
        assert isinstance(model, LightningModule)
        best_valid_loss = 10e9
        optimizers, _ = model.configure_optimizers()
        optimizer = optimizers[0]
        model.model.to(self.device)
        
        for i in range(self.max_epochs):
            outputs = []
            model.model.train()
            for batch_idx, (x_train, y_train) in enumerate(train_dataloader):
                x_train = x_train.to(self.device)
                y_train = y_train.to(self.device)
                output_dict = model.training_step((x_train, y_train), batch_idx)
                loss = output_dict['loss']
                # zero, backpropagation, ajusta parâmetros pelo gradiente descendente
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                outputs.append(output_dict)

            model.training_epoch_end(outputs=outputs)
            
            # Laço de Validação, um a cada época.
            if val_dataloader:
                output_val_end = self.validate(model, val_dataloader)
                print(f'Epoch {i} - {output_val_end["progress_bar"]}')
                # Salvando o melhor modelo de acordo com a loss de validação.
                if output_val_end['valid_loss'] < best_valid_loss:
                    torch.save(model.model.state_dict(), 'best_model.pt')
                    best_valid_loss = output_val_end['valid_loss']

    def validate(self, model, val_dataloader):
        outputs = []
        model.model.eval()
        with torch.no_grad():
            for batch_idx, (x, y) in enumerate(val_dataloader):
                x = x.to(self.device)
                y = y.to(self.device)
                output_dict = model.validation_step((x, y), batch_idx)
                outputs.append(output_dict)

        output_dict = model.validation_epoch_end(outputs=outputs)
        return output_dict

    def test(self, model, test_dataloader):
        outputs = []
        model.model.eval()
        with torch.no_grad():
            for batch_idx, (x, y) in enumerate(test_dataloader):
                x = x.to(self.device)
                y = y.to(self.device)
                output_dict = model.test_step((x, y), batch_idx)
                outputs.append(output_dict)

        output_dict = model.test_epoch_end(outputs=outputs)
        return output_dict

In [None]:
if pl_original:
    ! pip install pytorch_lightning
    from pytorch_lightning import LightningModule, Trainer

## Definindo o Modelo

In [None]:
class Modelo(torch.nn.Module):
    def __init__(self):
        super(Modelo, self).__init__()
        # Defina seu modelo aqui.        
        self.model = resnet50(pretrained=True)
        for param in self.model.parameters():
            param.requires_grad = False
        self.model.fc = torch.nn.Sequential(
               torch.nn.Linear(2048, 1024),
               torch.nn.ReLU(inplace=True),
               torch.nn.Linear(1024, 128),
               torch.nn.ReLU(inplace=True),
               torch.nn.Linear(128, 10),
        )



        #self.dense = torch.nn.Sequential(
            #torch.nn.Linear(32*32*3, 500),
            #torch.nn.ReLU(),
            #torch.nn.Linear(500, 10),

        #    torch.nn.Conv2d(3, 32, kernel_size=3, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.Conv2d(32, 128, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.MaxPool2d(2, 2), # output: 128 x 16 x 16

        #    torch.nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.MaxPool2d(2, 2), # output: 128 x 8 x 8

        #    torch.nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
        #    torch.nn.ReLU(),
        #    torch.nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

         #   torch.nn.Flatten(), 
          #  torch.nn.Linear(256*4*4, 1024),
           # torch.nn.ReLU(),
            #torch.nn.Linear(1024,2048),
            #torch.nn.Tanh(),
           # torch.nn.ReLU(),
           # torch.nn.Linear(2048,1024),
            #torch.nn.ReLU(),
            #torch.nn.Linear(1024,2048),
            #torch.nn.ReLU(),
            #torch.nn.Linear(2048,1024),
            #torch.nn.ReLU(),
            #torch.nn.Linear(1024, 512),
            #torch.nn.ReLU(),
            #torch.nn.Linear(512, 10),
        #)
    
    def forward(self, x):
        return self.model(x)

## Criação do modelo Pytorch Lightning

In [None]:
class LightningClassifier(LightningModule):
    def __init__(self, hparams):
        super().__init__()

        self.hparams = hparams
        self.criterion = torch.nn.CrossEntropyLoss(reduction='none')

        # Note como a arquitetura esta dependente dos hiperparâmetros salvos.
        self.model = Modelo()
        

    def forward(self, x):
        logits = self.model(x)
        preds = logits.argmax(dim=1)
        return logits, preds

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        #x = x.reshape(-1, 32*32*3)
        # predict da rede
        logits = self.model(x)

        # calcula a perda
        batch_losses = self.criterion(logits, y)
        loss = batch_losses.mean()
        run['train/batch_loss'].log(loss)

        # O PL sempre espera um retorno nomeado 'loss' da training_step.
        return {'loss': loss, 'batch_losses': batch_losses}

    def training_epoch_end(self, outputs):
        avg_loss = torch.stack([output['batch_losses'] for output in outputs]).mean()

        run['train/loss'].log(avg_loss)
        return
  
    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        
        # Transforma a entrada para duas dimensões
        #x = x.reshape(-1, 32*32*3)
        # predict da rede
        logits, preds = self.forward(x)

        # calcula a perda
        batch_losses = self.criterion(logits, y)
        # calcula a acurácia
        batch_accuracy = (preds == y)
        
        # Retornamos as losses do batch para podermos fazer a média no validation_epoch_end.
        return {'batch_losses': batch_losses, 'batch_accuracy': batch_accuracy}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([output['batch_losses'] for output in outputs]).mean()
        accuracy = torch.stack([output['batch_accuracy'] for output in outputs]).float().mean()

        run['valid/loss'].log(avg_loss)
        run['valid/acuracy'].log(accuracy)

        metrics = {'valid_loss': avg_loss.item(), 'accuracy': accuracy.item()}
        output =  {'progress_bar': metrics, 'valid_loss': avg_loss.item()}
        return output
  
    def test_step(self, val_batch, batch_idx):
        # A implementação deste método é opcional no Pytorch Lightning.
        x, y = val_batch
        
        # Transforma a entrada para duas dimensões
        #x = x.reshape(-1, 32*32*3)
        # predict da rede
        logits, preds = self.forward(x)

        # calcula a perda
        batch_losses = self.criterion(logits, y)
        # calcula a acurácia
        batch_accuracy = (preds == y)
        
        # Retornamos as losses do batch para podermos fazer a média no validation_epoch_end.
        return {'batch_losses': batch_losses, 'batch_accuracy': batch_accuracy}

    def test_epoch_end(self, outputs):
        # A implementação deste método é opcional no Pytorch Lightning.
        avg_loss = torch.stack([output['batch_losses'] for output in outputs]).mean()
        accuracy = torch.stack([output['batch_accuracy'] for output in outputs]).float().mean()

        run['valid/loss'].log(avg_loss)
        run['valid/acuracy'].log(accuracy)
        metrics = {'Test loss': avg_loss.item(), 'test accuracy': accuracy.item()}
        output =  {'progress_bar': metrics}
        return output

    def configure_optimizers(self):
        # Gradiente descendente
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams['learning_rate'])
        # Aqui usamos um scheduler dummy pois o pytorch lightning original requer um.
        scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lambda epoch: 1.0)
        return [optimizer], [scheduler]  # Forma de retorno para associar um otimizador a um scheduler.

## Inicialização dos parâmetros

In [None]:
hparams = {
    'max_epochs': 10,
    'learning_rate': 0.001
}

## Treinamento

In [None]:
torch.cuda.empty_cache()
pl_model = LightningClassifier(hparams=hparams)
trainer = Trainer(max_epochs=hparams['max_epochs'])
trainer.fit(pl_model, train_dataloader, val_dataloader)

Using cuda:0
Epoch 0 - {'valid_loss': 0.6658190488815308, 'accuracy': 0.7680999636650085}
Epoch 1 - {'valid_loss': 0.5999671816825867, 'accuracy': 0.795799970626831}
Epoch 2 - {'valid_loss': 0.560926616191864, 'accuracy': 0.8050999641418457}
Epoch 3 - {'valid_loss': 0.5577221512794495, 'accuracy': 0.8068999648094177}
Epoch 4 - {'valid_loss': 0.5769901871681213, 'accuracy': 0.8069999814033508}
Epoch 5 - {'valid_loss': 0.5344492793083191, 'accuracy': 0.8136000037193298}
Epoch 6 - {'valid_loss': 0.5423210263252258, 'accuracy': 0.8127999901771545}


Exception in thread Thread-13:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/neptune/new/internal/backends/utils.py", line 51, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/neptune/new/internal/backends/hosted_neptune_backend.py", line 229, in ping_run
    self.leaderboard_client.api.ping(experimentId=str(run_uuid)).response().result
  File "/usr/local/lib/python3.7/dist-packages/bravado/http_future.py", line 239, in response
    six.reraise(*sys.exc_info())
  File "/usr/local/lib/python3.7/dist-packages/six.py", line 703, in reraise
    raise value
  File "/usr/local/lib/python3.7/dist-packages/bravado/http_future.py", line 200, in response
    swagger_result = self._get_swagger_result(incoming_response)
  File "/usr/local/lib/python3.7/dist-packages/bravado/http_future.py", line 124, in wrapper
    return func(self, *args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/bravado/http_future.py", 

Epoch 7 - {'valid_loss': 0.5421552062034607, 'accuracy': 0.8118999600410461}


Error occurred during asynchronous operation processing: Timestamp must be non-decreasing for series attribute: train/batch_loss. Invalid point: 2021-05-06T02:17:07.682Z
Error occurred during asynchronous operation processing: Invalid point for string series: monitoring/stderr : Text longer than 1000 characters was truncated


Epoch 8 - {'valid_loss': 0.5111066102981567, 'accuracy': 0.8217999935150146}
Epoch 9 - {'valid_loss': 0.5041409730911255, 'accuracy': 0.8269000053405762}


## Teste

In [None]:
trainer.test(pl_model, test_dataloader)

{'progress_bar': {'Test loss': 0.5029945969581604,
  'test accuracy': 0.8279999494552612}}