In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
import os
from skimage import io, transform
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image
from torchsummary import summary
import pandas as pd
import time
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from emb_descricoes import preprocess

EMBEDDING_DIM = 300
LOG_INTERVAL = 40 
MAX_LEN = 200
BATCH_SIZE = 16

K = 1
ENC_EMB_DIM = K*32
DEC_EMB_DIM = K*32
ENC_HID_DIM = K*64
DEC_HID_DIM = K*64
ATTN_DIM = K*8
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

Using TensorFlow backend.


In [2]:
embedding_matrix, portinari_idx, retratos_idx, resto_idx, word_index, index_word = preprocess(True)
no_cuda = False
seed = 1
cuda = not no_cuda and torch.cuda.is_available()
torch.manual_seed(seed)
device = torch.device("cuda" if cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
torch.manual_seed(seed)

class PortinariDesc(Dataset):
    def __init__(self, data) -> None:
        super().__init__()
        self.data = data

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)
    
#cria o dataset


Achou 526 retratos




total words 571096
null words 2376
0.42 % de palavras não encontradas no emb


In [3]:
from sklearn import model_selection
train_retratos, val_retratos = model_selection.train_test_split(retratos_idx, test_size = 0.2, shuffle = True)
train_resto, val_resto = model_selection.train_test_split(resto_idx, test_size = 0.2, shuffle = True)


train = train_retratos + train_resto
val = val_retratos + val_resto

In [4]:

train = [sent[:MAX_LEN] for sent in train]
val = [sent[:MAX_LEN] for sent in val]

In [5]:
train_tensor = list(map(lambda x: torch.tensor(x), train))
val_tensor = list(map(lambda x: torch.tensor(x), val))

train_dataset = PortinariDesc(train_tensor)
val_dataset = PortinariDesc(val_tensor)

def pad_collate(batch):
    xx = batch
    x_lens = list(map(len, xx))

    xx_pad = pad_sequence(xx, batch_first=True, padding_value=0, )

    return xx_pad, x_lens

#embedding_torch = nn.Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1])

In [6]:
'''unknown_tokens = [i for i in range(len(embedding_matrix)) if np.sum(embedding_matrix[i]-embedding_matrix[-1]) == 0]
for tk in unknown_tokens:
    index_word[tk] = '<unk>'
for w, idx in word_index.items():
    if np.sum(embedding_matrix[idx])==0:
        word_index[w] = len(embedding_matrix)-1
palavras_desconhecidas = [w for w, idx in word_index.items()  if np.sum(embedding_matrix[idx]-embedding_matrix[-1]) == 0]
len(palavras_desconhecidas)'''

"unknown_tokens = [i for i in range(len(embedding_matrix)) if np.sum(embedding_matrix[i]-embedding_matrix[-1]) == 0]\nfor tk in unknown_tokens:\n    index_word[tk] = '<unk>'\nfor w, idx in word_index.items():\n    if np.sum(embedding_matrix[idx])==0:\n        word_index[w] = len(embedding_matrix)-1\npalavras_desconhecidas = [w for w, idx in word_index.items()  if np.sum(embedding_matrix[idx]-embedding_matrix[-1]) == 0]\nlen(palavras_desconhecidas)"

In [7]:
embedding_tensor = torch.Tensor(embedding_matrix).to(device)


train_data_loader = torch.utils.data.DataLoader(
    train_dataset, drop_last=True,
    batch_size = BATCH_SIZE, shuffle=True, collate_fn=pad_collate)

val_data_loader = torch.utils.data.DataLoader(
    val_dataset, drop_last=True,
    batch_size = BATCH_SIZE, shuffle=True, collate_fn=pad_collate)

In [8]:
import random
from typing import Tuple

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch import Tensor

from text_models import Encoder, Decoder, Attention, Seq2Seq

In [9]:
'''ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
ENC_HID_DIM = 512
DEC_HID_DIM = 512
ATTN_DIM = 64
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5'''



enc = Encoder(embedding_tensor, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT).to(device)
attn = Attention(ENC_HID_DIM, DEC_HID_DIM, ATTN_DIM).to(device)
dec = Decoder(embedding_tensor, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn).to(device)
model = Seq2Seq(enc, dec, device).to(device)

In [10]:

def init_weights(m: nn.Module):
    for name, param in m.named_parameters():
        if 'weight' in name:
            nn.init.normal_(param.data, mean=0, std=0.01)
        else:
            nn.init.constant_(param.data, 0)

model.apply(init_weights)
print("")




In [11]:

optimizer = optim.Adam(model.parameters(), lr=0.05)

In [12]:

def count_parameters(model: nn.Module):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


print(f'The model has {count_parameters(model):,} trainable parameters')


The model has 4,429,283 trainable parameters


In [13]:
criterion = nn.CrossEntropyLoss(ignore_index=0)

In [14]:
'''import math
import time

(x,x_l) = next(iter(data_loader))
x = x.to(device)
x_l = torch.Tensor(x_l).to(device)
output = model(x.permute(1,0),x_l)'''

'import math\nimport time\n\n(x,x_l) = next(iter(data_loader))\nx = x.to(device)\nx_l = torch.Tensor(x_l).to(device)\noutput = model(x.permute(1,0),x_l)'

In [15]:
len(train_data_loader)

226

In [16]:
import math
import time

def train(model: nn.Module,
          iterator: DataLoader,
          optimizer: optim.Optimizer,
          criterion: nn.Module,
          clip: float,
          epoch):
    
    epoch_loss = 0
    epoch_start = 0
    train_loss = 0
    start = time.time()
    
    model.train(True)
    
    for batch_idx, (x,x_l) in enumerate(iterator):
        
        x = x.to(device)
       
        x_l = torch.Tensor(x_l).to(device)
        
        #print(f"x = {x.shape}, x_l = {x_l.shape}")
        
        output = model(x.permute(1,0),x_l)
        
        optimizer.zero_grad()
        
        flat_output = output.view(-1, output.shape[-1])
        target = x.permute(1,0).contiguous().view(-1)
        
        loss = criterion(flat_output, target)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        epoch_loss += loss.item()

        
        if batch_idx % LOG_INTERVAL == 0:
                interval = time.time() - start
                start = time.time()
                epoch_start = epoch_start + interval

                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss : {:.6f} \tTime Interv: {:.6f}'.format(
                    epoch, batch_idx * len(x), len(iterator.dataset),
                           100. * batch_idx / len(iterator),
                           loss.item(), interval))
        #del(x)
        #del(x_l)
        #torch.cuda.empty_cache()
            
    return epoch_loss / len(iterator)

In [17]:
def evaluate(model: nn.Module,
             iterator: DataLoader,
             criterion: nn.Module):

    model.eval()

    epoch_loss = 0

    with torch.no_grad():

        for batch_idx, (x,x_l) in enumerate(iterator):

            x = x.to(device)

            x_l = torch.Tensor(x_l).to(device)
            
            output = model(x.permute(1,0),x_l, 0)

            flat_output = output.view(-1, output.shape[-1])
            target = x.permute(1,0).contiguous().view(-1)

            loss = criterion(flat_output, target)

            epoch_loss += loss.item()
            del(x)
            del(x_l)
            torch.cuda.empty_cache()

    return epoch_loss / len(iterator)

In [None]:
def epoch_time(start_time: int,
               end_time: int):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


N_EPOCHS = 20
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss = train(model, train_data_loader, optimizer, criterion, CLIP, epoch)
    valid_loss = evaluate(model, val_data_loader, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

Epoch: 01 | Time: 3m 6s
	Train Loss: 5.855 | Train PPL: 348.991
	 Val. Loss: 5.931 |  Val. PPL: 376.386
Epoch: 02 | Time: 3m 4s
	Train Loss: 5.103 | Train PPL: 164.575
	 Val. Loss: 6.066 |  Val. PPL: 430.898
Epoch: 03 | Time: 3m 7s
	Train Loss: 4.913 | Train PPL: 135.999
	 Val. Loss: 5.846 |  Val. PPL: 345.893


In [39]:
model.train()
(x,x_l) = next(iter(train_data_loader))
x = x.to(device)
x_l_tens = torch.Tensor(x_l).to(device)
output = model(x.permute(1,0),x_l_tens, 0.5)

In [40]:
out = output.argmax(-1).transpose(1,0).cpu().numpy().tolist()

In [41]:
inp = x.cpu().numpy().tolist()

In [42]:
inp_text = [[index_word[token] if token != 0 else 'NUll' for token in sentence ] 
          for sentence in inp]
out_text = [[index_word[token] if token != 0 else 'NUll' for token in sentence ] 
              for sentence in out]

In [43]:
inp_text[0][0:x_l[0]+1]

['composição',
 'em',
 'tons',
 'escuros',
 'de',
 'preto',
 'cinzas',
 'e',
 'terras',
 'e',
 'nos',
 'tons',
 'branco',
 'ocre',
 'rosa',
 'vermelho',
 'azul',
 'claro',
 'textura',
 'lisa',
 'retrato',
 'de',
 'mulher',
 'contra',
 'fundo',
 'cinza',
 'dégradé',
 'retratada',
 'está',
 'de',
 'frente',
 'meio',
 'corpo',
 'com',
 'rosto',
 'quase',
 '<unk>',
 '<unk>',
 'a',
 'esquerda',
 'tem',
 'cabelos',
 'curtos',
 'e',
 'escuros',
 'repartidos',
 'a',
 'direita',
 'e',
 'enrolados',
 'em',
 'pequenos',
 'cachos',
 'de',
 'onde',
 'saem',
 'as',
 'pontas',
 'dos',
 'papelotes',
 'sobrancelhas',
 'são',
 'finas',
 'pretas',
 'e',
 'retas',
 'olhos',
 'escuros',
 'pequenos',
 'e',
 'amendoados',
 'nariz',
 'fino',
 'e',
 'longo',
 'lábios',
 'fechados',
 'pintados',
 'de',
 'vermelho',
 'queixo',
 'fino',
 'rosto',
 'magro',
 'e',
 'comprido',
 'com',
 'toques',
 'de',
 'rosa',
 'nas',
 'faces',
 'luz',
 'incidindo',
 'a',
 'direita',
 'e',
 'iluminando',
 'sua',
 'face',
 'esquerd

In [44]:
out_text[0][0:x_l[0]+1]

['composição',
 'nos',
 'preto',
 'preto',
 'e',
 'branco',
 'e',
 'branco',
 'linhas',
 'de',
 'e',
 'e',
 'de',
 'de',
 'de',
 'de',
 'de',
 'de',
 'de',
 'de',
 'de',
 'de',
 'de',
 'a',
 'fundo',
 'de',
 'de',
 'de',
 'a',
 'de',
 'a',
 'de',
 'de',
 'e',
 'a',
 'de',
 'a',
 'de',
 'de',
 'a',
 'e',
 'de',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'de',
 'e',
 'e',
 'e',
 'e',
 'e',
 'de',
 'a',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'a',
 'e',
 'e',
 'de',
 'a',
 'e',
 'e',
 'e',
 'e',
 'a',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'de',
 'a',
 'e',
 'e',
 'e',
 'e',
 'de',
 'de',
 'e',
 'a',
 'a',
 'de',
 'a',
 'esquerda',
 'e',
 'de',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'de',
 'a',
 'a',
 'esquerda',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e',
 'e']