In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
import os
from skimage import io, transform
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image
from skimage.io import imread
from torchsummary import summary
import pandas as pd
import time
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from emb_descricoes import preprocess

EMBEDDING_DIM = 300

Using TensorFlow backend.


In [2]:
embedding_matrix, retratos_idx, word_index, index_word = preprocess()
no_cuda = False
seed = 1
cuda = not no_cuda and torch.cuda.is_available()
torch.manual_seed(seed)
device = torch.device("cuda" if cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
torch.manual_seed(seed)

class PortinariDesc(Dataset):
    def __init__(self, data) -> None:
        super().__init__()
        self.data = data

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)
    
#cria o dataset
retratos_idx_tensor = list(map(lambda x: torch.tensor(x), retratos_idx))
retratos_dataset = PortinariDesc(retratos_idx_tensor)

def pad_collate(batch):
    xx = batch
    x_lens = list(map(len, xx))

    xx_pad = pad_sequence(xx, batch_first=True, padding_value=0)

    return xx_pad, x_lens

embedding_torch = nn.Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1])

Achou 526 retratos
1.63 % de palavras não encontradas no emb




In [3]:
torch.Tensor(embedding_matrix)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0878,  0.1001, -0.2374,  ...,  0.2434,  0.4280,  0.3246],
        [ 0.2822, -0.6084, -0.3565,  ..., -0.0978, -0.0218, -0.3524],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.3439,  0.2531, -0.0384,  ...,  0.4132,  0.0883, -0.4400],
        [-0.2320, -0.3705, -0.0854,  ...,  0.1377, -0.0410, -0.6524]])

In [4]:
data_loader = torch.utils.data.DataLoader(
    retratos_dataset,
    batch_size = 10, shuffle=False, collate_fn=pad_collate)


In [5]:
#from text_models import *
class AE_LSTM(nn.Module):
    def __init__(self, nb_lstm_layers, embedding_matrix, nb_lstm_units=100, embedding_dim=300, nb_vocab_words = 3188, batch_size=32):
        super(AE_LSTM, self).__init__()

        self.nb_lstm_layers = nb_lstm_layers
        self.nb_lstm_units = nb_lstm_units
        self.embedding_dim = embedding_dim
        self.batch_size = batch_size
        self.nb_vocab_words = nb_vocab_words
        self.embedding_matrix = embedding_matrix

        # when the model is bidirectional we double the output dimension
        self.lstm = nn.LSTM

        # build actual NN
        
        self.__build_model()

    def __build_model(self):

        # whenever the embedding sees the padding index it'll make the whole vector zeros
        '''self.word_embedding = nn.Embedding(
            num_embeddings=self.nb_vocab_words,
            embedding_dim=self.embedding_dim
        )'''
        
        self.word_embedding = nn.Embedding.from_pretrained(
            embeddings=self.embedding_matrix
        )          

        # design LSTM
        self.lstm_enc = nn.LSTM(
            input_size=self.embedding_dim,
            hidden_size=self.nb_lstm_units,
            num_layers=self.nb_lstm_layers,
            batch_first=True,
        )
        
        self.lstm_dec = nn.LSTM(
            input_size=self.nb_lstm_units,
            hidden_size=self.nb_lstm_units,
            num_layers=self.nb_lstm_layers,
            batch_first=True,
        )

    def init_hidden(self):
        # the weights are of the form (nb_layers, batch_size, nb_lstm_units)
        hidden_a = torch.randn(self.nb_lstm_layers, self.batch_size, self.nb_lstm_units)
        hidden_b = torch.randn(self.nb_lstm_layers, self.batch_size, self.nb_lstm_units)
        '''
        if self.on_gpu:
            hidden_a = hidden_a.cuda()
            hidden_b = hidden_b.cuda()'''

        hidden_a = Variable(hidden_a)
        hidden_b = Variable(hidden_b)

        return (hidden_a, hidden_b)
    
    
    def encode(self, X, X_lengths):
        
        self.hidden_enc = self.init_hidden()

        batch_size, seq_len = X.size()
        #print(X.size())
        #print(batch_size)

        X = self.word_embedding(X)
        X = torch.nn.utils.rnn.pack_padded_sequence(X, X_lengths, batch_first=True, enforce_sorted=False)

        Z, self.hidden_enc = self.lstm_enc(X, self.hidden_enc)
        Z, _ = torch.nn.utils.rnn.pad_packed_sequence(Z, batch_first=True)
        
        timesteps = torch.tensor(Z.shape)[1]
        
        return Z[:,-1:,:].repeat((1,timesteps,1))

    
    def decode(self, Z, Z_lengths):
        
        self.softmax = nn.LogSoftmax(dim=1)
        self.hidden_dec = self.init_hidden()
        self.decoder_output = nn.Linear(self.nb_lstm_units, self.nb_vocab_words) 
        
        batch_size, seq_len, _ = Z.size()
        X_hat = torch.nn.utils.rnn.pack_padded_sequence(Z, Z_lengths, batch_first=True, enforce_sorted=False)

        print(batch_size)
        print(seq_len)
        
        X_hat, self.hidden_dec = self.lstm_dec(X_hat, self.hidden_dec)
        X_hat, _ = torch.nn.utils.rnn.pad_packed_sequence(X_hat, batch_first=True)
        X_hat = F.log_softmax(self.decoder_output(X_hat), dim = -1)

        return X_hat

In [6]:
class customLoss(nn.Module):
    def __init__(self):
        super(customLoss, self).__init__()
        self.nn_loss = nn.NLLLoss(reduction='mean', ignore_index=0)
    def forward(self, x_rec, x):
        x_rec = x_rec.view(-1, x_rec.shape[-1]) 
        x = x.view(-1)
        mask = (x > 0).float()
        nb_tokens = int(torch.sum(mask))
        #x_rec = x_rec[range(x_rec.shape[0]), x]
        log_loss = self.nn_loss(x_rec, x)
        #log_loss1 = mask * log_loss1
        #log_loss1 = torch.sum(log_loss1)/nb_tokens
        return log_loss
    
loss_nll = customLoss()

In [35]:
'''zero_axis_emb = np.sum(embedding_matrix, axis=1) == 0
zero_axis = [i for i in range(len(zero_axis_emb)) if zero_axis_emb[i] == True]'''
#from text_models import AE_LSTM
model = AE_LSTM(nb_lstm_layers = 1, nb_lstm_units = 222, 
                  embedding_matrix = torch.Tensor(embedding_matrix),
                  embedding_dim = 300, nb_vocab_words = 3188, batch_size = 10).to(device)
optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha= 0.9)

In [27]:
palavras_ausentes = [[(index_word[token]) for token in comment  if np.sum(embedding_matrix[token]) == 0] for comment in retratos_idx] 
#palavras_ausentes

In [28]:
x = next(iter(data_loader))

In [29]:
z = model.encode(x[0], x[1])
x_rec = model.decode(z, x[1])


10
146


In [19]:
x_rec.view(-1, x_rec.shape[-1]).shape

torch.Size([1460, 3188])

In [20]:
loss_nll = customLoss()
loss = loss_nll(x_rec, x[0])

In [21]:
loss

tensor(8.0644, grad_fn=<NllLossBackward>)

In [33]:
def train(epoch):
    epoch_start = 0
    model.train(True)
    train_loss = 0
    start = time.time()
    for batch_idx, (x, x_lengths) in enumerate(data_loader): 
        x = x.to(device)
        optimizer.zero_grad()
        z = model.encode(x, x_lengths)
        x_rec = model.decode(z, x_lengths)
        loss = loss_nll(x_rec, x)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        
        if batch_idx % LOG_INTERVAL == 0:
            interval = time.time() - start
            start = time.time()
            epoch_start = epoch_start + interval
            
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f \tTime Interv: {:.6f}'.format(
                epoch, batch_idx * len(data), len(data_loader.dataset),
                       100. * batch_idx / len(train_loader_celeba),
                       loss.item(), interval))

    print('====> Epoch: {} Average loss: {:.6f} Elapsed Time: {:.6f}'.format(
        epoch, train_loss * BATCH_SIZE / len(train_loader_celeba.dataset), epoch_start))
    train_losses.append(train_loss*BATCH_SIZE/ len(train_loader_celeba.dataset))


In [34]:
train(1)

RuntimeError: Expected object of backend CPU but got backend CUDA for argument #3 'index'