<a href="https://colab.research.google.com/github/finardi/IA376J/blob/master/Aula1%20-%20classifica%C3%A7%C3%A3o%20COCO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Aula 1
> #### Paulo Ricardo Finardi

In [1]:
!nvidia-smi

Mon Sep 21 17:15:29 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.66       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# basic
import json
import h5py
import torch
import numpy as np
import pandas as pd
from collections import Counter, OrderedDict

# sklearn
from sklearn.metrics import f1_score

# torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.utils.data import SequentialSampler, RandomSampler

# Preparação dos dados e criação do Dataset

In [3]:
path = '/content/drive/My Drive/Colab Notebooks/IA376J/Aula1/'
train_path_labels = path+'TRAIN_CAPTIONS_coco_5_cap_per_img_5_min_word_freq.json'
train_path_imgs = path+'TRAIN_IMAGES_coco_5_cap_per_img_5_min_word_freq.hdf5'

val_path_labels = path+'VAL_CAPTIONS_coco_5_cap_per_img_5_min_word_freq.json'
val_path_imgs = path+'VAL_IMAGES_coco_5_cap_per_img_5_min_word_freq.hdf5'

test_path_labels = path+'TEST_CAPTIONS_coco_5_cap_per_img_5_min_word_freq.json'
test_path_imgs = path+'TEST_IMAGES_coco_5_cap_per_img_5_min_word_freq.hdf5'

word_map_path = path+'WORDMAP_coco_5_cap_per_img_5_min_word_freq.json'

In [4]:
class Dataset_Aula1(Dataset):
    def __init__(self, path_h5, path_labels, path_word_map):

        self.images = h5py.File(path_h5, 'r')
        self.labels = self._get_label(path_labels, path_word_map)
        
    def _get_label(self, path_label, path_wrd_map):

        # Lê o arquivo json das labels (ints)
        with open(path_label, "rb") as handle:
            raw_labels = json.loads(handle.read())

        # Lê o mapeamento dos rotulos das labels (str)
        with open(path_wrd_map, "rb") as handle:
            word_map = json.loads(handle.read())

        # Lista que armazena as labels
        labels = []
        # Para cada sample in labels (cada img possui 5 labels, logo [::5])
        for sample in raw_labels[::5]:
            # Anexa a label somente se for um rótulo não 0, não start e não end
            # subtrai 1 do i para os labels ficarem de 0 até 9 
            labels.append([i-1 for i in sample if i != 0 
                           and i != word_map['<start>'] 
                           and i != word_map['<end>']])
                
        # Se a quanditade de imagens é != do numero de labels
        assert self.images['images'].shape[0] == len(raw_labels[::5])
        return labels

    def __len__(self):
        return self.images['images'].shape[0]
        
    def __getitem__(self, idx):
        assert idx >= 0, f'ERRO idx {idx}' 
        if idx >= self.__len__():
            raise IndexError()

        images = self.images['images'][idx] # shape=CHW
        images = torch.from_numpy(images).type(torch.FloatTensor)
        
        # Labels é a forma de lista com 1 elemento
        labels = self.labels[idx]
        labels = torch.from_numpy(np.array(labels)).type(torch.LongTensor)

        return images, labels.squeeze()

#-------------------------------------------------------------------------

def dev_dataloader(ds, number_of_samples=10, batch_size=32):
    """ 
    Cria um pequeno dataloader com amostras do dataset de treino

    Retorna:
         dev_data_loader: dataLoader com k amostras do conj. de treino
     -------------------
     Argumentos:
         ds - dataset de treino
         number_of_samples - número de amostras
         batch_size - tamanho do batch
    """
    # K amostras serão utilizadas no dataloader    
    k = number_of_samples

    # Converte exemplos de imagem até k
    img = [ds[i][0] for i in range(k)]
    
    # Converte exemplos de labels até k
    label = [ds[i][1] for i in range(k)]
    
    # Se os tamanhos não são iguais é um erro
    assert len(img) == len(label), f'ERRO lenghts imgs/labels'
    
    # Converte a lista de imagens para tensor
    img = torch.tensor([t.numpy() for t in img])
    
    # Converte a lsita de labels para tensor
    label = torch.tensor(label)
    
    # Cria um TensorDataset e Dataloader
    dev_loader = DataLoader(
        TensorDataset(img, label),
        shuffle=True, 
        batch_size=batch_size)
    return dev_loader

#-------------------------------------------------------------------------

def check_qtde_labels(ds):
    """ 
    Confere o número de labels por classe

    Retorna:
         data: DataFrame com números de amostras por classe
     -------------------
     Argumentos:
         ds - dataset de treino
    """

    # Armazena os labels
    labels_hist = []
    # Para cada label no dataset de treino...
    for _, label in ds:
        # Anexa o label na lista
        labels_hist.append(label)
    
    # Lê o json do word_map (rótulos das classes)
    with open(word_map_path, "rb") as handle:
                word_map = json.loads(handle.read())
    
    # Dict com k: qtde de exemplos -> v: classe
    sample_class = dict({v:k for k,v in word_map.items()})
    
    # Dict com k: int da classe -> v: qtde de exemplos
    number_samples = dict(Counter(np.array(labels_hist).flatten()))
    
    # Dataframe com valores dos Dicts k: sample_class -> v: number_samples
    data = pd.DataFrame(list({v1:v2 for v1,v2 in 
                           zip(sample_class.values(),
                               number_samples.values())}.items()))
    
    # Renomeia as colunas para melhor interpretabilidade
    data = data.rename(columns={0:'Classe', 1:'Qtde-Exemplos'})

    return data

#-------------------------------------------------------------------------

# Cria o dataset de treino
ds = Dataset_Aula1(train_path_imgs, train_path_labels,  word_map_path)

# Testando a função que cria um dev_dataloader
dev_loader = dev_dataloader(ds, number_of_samples=100, batch_size=32)

#-------------------------------------------------------------------------

# Imprime o DataFramde da qtde de exemplos por classe
check_qtde_labels(ds)

Unnamed: 0,Classe,Qtde-Exemplos
0,cat,3124
1,truck,1518
2,boat,1452
3,bird,1043
4,house,1243
5,car,2093
6,airplane,2168
7,horse,1890
8,tree,2626
9,dog,3660


# Dataloaders

In [5]:
BATCH_SZ = 128

#-----------------------------------__DATASETS__-----------------------------------

# Dataset de treino
ds_train = Dataset_Aula1(train_path_imgs, train_path_labels,  word_map_path)

# Dataset de validação
ds_val = Dataset_Aula1(val_path_imgs, val_path_labels,  word_map_path)

# Dataset de test
ds_test = Dataset_Aula1(test_path_imgs, test_path_labels, word_map_path)

#---------------------------------__DATALOADERS__----------------------------------

# Dataloader é um dict com chaves train, val e test
dataloaders = {
        
      'train': DataLoader(
         ds_train,
         batch_size=BATCH_SZ,
         sampler = RandomSampler(ds_train), # seleciona batches randomicamente
         num_workers=4,
         pin_memory=True
         ),
               
     'val': DataLoader(
         ds_val,
         batch_size=BATCH_SZ,
         sampler = SequentialSampler(ds_val), # toma batches sequencialmente
         num_workers=4,
         pin_memory=True
         ),

    'test': DataLoader(
         ds_test,
         batch_size=BATCH_SZ,
         sampler = SequentialSampler(ds_test), # toma batches sequencialmente
         num_workers=4,
         pin_memory=True
         ),
     }

# Verificando os tamanhos dos dataloaders
_ = {x: len(dataloaders[x]) for x in dataloaders.keys()}
_

{'test': 8, 'train': 163, 'val': 8}

# Modelo

In [6]:
# Verifica qual é o device disponível 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Para os experimentos serem determinísticos
manual_seed = 2357 # only primers ;)

def deterministic(rep=True):
    if rep:
        np.random.seed(manual_seed)
        torch.manual_seed(manual_seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(manual_seed)
            torch.cuda.manual_seed_all(manual_seed)
        torch.backends.cudnn.enabled = False 
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        print(f'Experimento deterministico, seed: \
{manual_seed} -- ', end = '')
        print(f'Existe {torch.cuda.device_count()} GPU \
{torch.cuda.get_device_name(0)} disponível.')
    else:
        print('Experimento randomico')

deterministic()

Experimento deterministico, seed: 2357 -- Existe 1 GPU Tesla T4 disponível.


In [7]:
class SimpleCNN(nn.Module):
    def __init__(self, channels, h, w, output):
        super().__init__()

        self.ch = channels
        self.h = h
        self.w = w
        self.out = output
        
        self.conv_layer = nn.Sequential(OrderedDict([
            ('conv1', nn.Conv2d(3, self.ch, kernel_size=3, padding=1)),
            ('bn1', nn.BatchNorm2d(self.ch)),
            ('relu1', nn.ReLU()),
            ('maxpool1', nn.MaxPool2d(3, stride=2)),
            
            ('conv2', nn.Conv2d(self.ch, 2 * self.ch, kernel_size=5, padding=1)),
            ('bn2', nn.BatchNorm2d(2 * self.ch)),
            ('relu2', nn.ReLU()),
            ('maxpool2', nn.MaxPool2d(3, stride=2)),

            ('conv3', nn.Conv2d(2 * self.ch, 3 * self.ch, kernel_size=3, padding=1)),
            ('bn3', nn.BatchNorm2d(3 * self.ch)),
            ('relu3', nn.ReLU()),
            ('adaptative', nn.AdaptiveMaxPool2d((1, 1))),
        ]))
        
        self.dense_layer =  nn.Linear(3 * self.ch, self.out)
        
    def forward(self, x):
        N,_,_,_ = x.shape
        o = self.conv_layer(x)
        o = o.view(N, -1) 
        return self.dense_layer(o)

#---------------------------------------------------------------

x,y = next(iter(dev_loader))  # utiliza x,y do dev_loader

CH = 64 # numero de canais 
H = x[0].shape[1] # altura da imagem
W = H # a imagem é quadrada, logo h=w
OUT = 10 # numero de clases

# Testando a rede com 1 exemplo
model = SimpleCNN(CH, H, W, OUT)
model(x).shape

torch.Size([32, 10])

# Numero de Parâmetros

In [8]:
def count_parameters(model):
    # Conta o numero de parametros se o parametro precisa de gradiente
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print('\n','#' * 46,f'\n # The model has {count_parameters(model):,}' \
       ' trainable parameters #\n', '#' * 46,'\n' )  
model


 ############################################## 
 # The model has 430,794 trainable parameters #
 ############################################## 



SimpleCNN(
  (conv_layer): Sequential(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU()
    (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU()
    (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv3): Conv2d(128, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu3): ReLU()
    (adaptative): AdaptiveMaxPool2d(output_size=(1, 1))
  )
  (dense_layer): Linear(in_features=192, out_features=10, bias=True)
)

In [9]:
def train_model(model, loss_fn, train_loader, optimizer, device):
    # Coloca o modelo no modo de treinamento 
    model.train()

    # Lista que guarda a loss total do batch
    loss_train = []

    # Para cada batch (imagem e label)...
    for x, y in train_loader:

        # Zera os gradientes
        optimizer.zero_grad()

        # Computa as predições (saída da rede)
        y_pred = model(
            x.to(device),
            )
        
        # Calcula a loss
        loss = loss_fn(y_pred, y.to(device))
        
        # Anexa a loss na lista
        loss_train.append(loss.item())
    
        # Computa os gradientes
        loss.backward()
        
        # Otimiza o modelo 
        optimizer.step()
    
    # Calcula a média da loss para o batch
    ave_train_loss = sum(loss_train) / len(loss_train)
    return ave_train_loss

def test_model(model, loss_fn, valid_loader, device):
    # Colaca o modelo em modo de avaliação (desliga os dropouts)
    model.eval()
    
    # Listas para avaliação do modelo
    preds, trues, loss_test = [],[],[]
    
    # Para cada batch (imagem e label)...
    for x, y in valid_loader:
        
        # Não calcula nenhum gradiente
        with torch.no_grad():
            y_pred = model(
            x.to(device),
            )
        
        # Calcula a loss
        loss = loss_fn(y_pred, y.to(device))
        
        # Anexa a loss na lista
        loss_test.append(loss.item())

        # Calcula as predições e envia os labels para cpu
        preds += y_pred.argmax(-1).cpu().numpy().tolist()
        trues += y.cpu().numpy().tolist()

    # Calcula a média da loss para o batch
    ave_test_loss = sum(loss_test) / len(loss_test)

    # Calcula a acuracia    
    preds = np.array(preds)
    trues = np.array(trues)
    acc = (1.* (preds==trues)).mean() 
    # Calcula o f1 com weighted (considera o desbalanceamento do dataset)
    f1 = f1_score(trues, preds, average='weighted', zero_division=0)
    
    return ave_test_loss, acc, f1

# Overfit no batch dev_loader (100 amostras)

In [10]:
OverFit = True

if OverFit:
    model = SimpleCNN(CH, H, W, OUT).to(device)

    # Num. de épocas
    N_EPOCHS = 40

    # Otimizador
    optimizer = torch.optim.Adam(model.parameters(), lr = 2e-5)

    loss_fn = torch.nn.CrossEntropyLoss()

    # Para cada época...
    for epoch_i in range(1, N_EPOCHS+1):

        # Obtem a loss do dado de treino
        loss_train = train_model(model, loss_fn, 
                                dev_loader, optimizer, device)
        
        # Obtem a loss, acc e f1 do dado de validação
        loss_val, acc, f1 = test_model(model, loss_fn, 
                                    dev_loader, device)
        if epoch_i==1:
            print(f'Epoca [{epoch_i}/{N_EPOCHS}] |', end=' ')
            print(f'Loss Treino: {loss_train:.3f}  ---- Loss Valid: \
{loss_val:.3f} -- Acc Valid: {acc:.3} -- F1 Valid: {f1:.3}')
            
        if epoch_i%10==0:
            print(f'Epoca [{epoch_i}/{N_EPOCHS}] |', end=' ')
            print(f'Loss Treino: {loss_train:.3f}  ---- Loss Valid: \
{loss_val:.3f} -- Acc Valid: {acc:.3} -- F1 Valid: {f1:.3}')

    print(f'\nFEITO!')

Epoca [1/40] | Loss Treino: 3.775  ---- Loss Valid: 2.407 -- Acc Valid: 0.12 -- F1 Valid: 0.0613
Epoca [10/40] | Loss Treino: 1.660  ---- Loss Valid: 1.747 -- Acc Valid: 0.5 -- F1 Valid: 0.455
Epoca [20/40] | Loss Treino: 1.233  ---- Loss Valid: 1.173 -- Acc Valid: 0.76 -- F1 Valid: 0.715
Epoca [30/40] | Loss Treino: 1.009  ---- Loss Valid: 0.983 -- Acc Valid: 0.93 -- F1 Valid: 0.915
Epoca [40/40] | Loss Treino: 0.848  ---- Loss Valid: 0.681 -- Acc Valid: 0.98 -- F1 Valid: 0.978

FEITO!


# Treino Completo

In [11]:
# Contagem de tempo
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

#----------------------------------------------------------------

# Path para salvar as epocas do modelo
path_save = '/content/drive/My Drive/Colab Notebooks/IA376J/Aula1/checkpoints/ch'


# Se True carrega um modelo salvo
CHECKPOINT = False
if CHECKPOINT:
    checkpoint = torch.load(path_save)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']

else:
    model = SimpleCNN(CH, H, W, OUT).to(device)

# Para que os experimentos sejam deterministicos
deterministic()

# Num. de épocas
N_EPOCHS = 2

# Otimizador
optimizer = torch.optim.SGD(
    model.parameters(), 
    lr=5e-3, 
    momentum=0.9,
    weight_decay=5e-3
    )

loss = torch.nn.CrossEntropyLoss()

#----------------------------------------------------------------
start.record()

# Guarda as informações do treino
training_stats = []

# Para cada época...
for epoch_i in range(1, N_EPOCHS+1):

    # Obtem a loss do dado de treino
    loss_train = train_model(model, loss, 
                             dataloaders['train'], optimizer, device)
    
    # Obtem a loss e acc do dado de validação
    loss_val, acc, f1 = test_model(model, loss, 
                                        dataloaders['val'], device)
    
    print(f'Epoca [{epoch_i}/{N_EPOCHS}] |', end=' ')
    print(f'Loss Treino: {loss_train:.3f}  ---- Loss Valid: \
{loss_val:.3f} -- Acc Valid: {acc:.3} -- F1 Valid: {f1:.3}')
    
    # Salva o estado do modelo na época atual
    torch.save({
            'epoch': epoch_i,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, path_save)

    # Anexa as estatisticas da época de treinamento
    training_stats.append(
        {
            'epoch': epoch_i,
            'Training Loss': loss_train,
            'Valid Loss': loss_val,
            'Valid Acc': acc,
            'Valid F1': f1,
        }
    )

end.record()
torch.cuda.synchronize()    
#------------------------------------------------------------------
print(f'\nFEITO!')
print(f'Tempo gasto: {start.elapsed_time(end)/1000/60 :.3f} min.')

Experimento deterministico, seed: 2357 -- Existe 1 GPU Tesla T4 disponível.
Epoca [1/2] | Loss Treino: 2.197  ---- Loss Valid: 2.499 -- Acc Valid: 0.146 -- F1 Valid: 0.0981
Epoca [2/2] | Loss Treino: 1.863  ---- Loss Valid: 2.866 -- Acc Valid: 0.146 -- F1 Valid: 0.115

FEITO!
Tempo gasto: 11.681 min.


# Log do treino em DataFrame

In [12]:
df_stats = pd.DataFrame(data=training_stats)
df_stats = df_stats.set_index('epoch')
pd.set_option('precision', 3)
df_stats

Unnamed: 0_level_0,Training Loss,Valid Loss,Valid Acc,Valid F1
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2.197,2.499,0.146,0.098
2,1.863,2.866,0.146,0.115


# Avaliação

In [13]:
loss_test, acc, f1 = test_model(model, loss, dataloaders['test'], device)
print(f'Loss test {loss_test:.3f} -- Acc test: {acc:.3} -- F1 test: {f1:.3}')

Loss test 2.922 -- Acc test: 0.128 -- F1 test: 0.0983


## Diversas estratégias podem ser utilizadas para melhorar essa solução, dois  pontos chaves são:
1. #### Fazer aumento  e transformação dos dados de treinamento
2. #### Usar os pesos de alguma rede pré-treinada como `backbone`, exemplo: Resnet

# Fim
