In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from IPython.display import clear_output
import re
import os
import unicodedata
import numpy as np
from tqdm import tqdm
import time
import utils
%load_ext autoreload
%autoreload 2

os.environ["CUDA_VISIBLE_DEVICES"]="0"

### Dataset load and prepare

In [12]:
train_loader, valid_loader, vocab, word2id, id2word = utils.prepare_imdb_dataloaders(path_to_pkl='data/matrix.pkl',
                                                                            forward=True)

Data has been successfully loaded


In [3]:
try:
    reviews = torch.load("data/reviews")
except:    
    path = 'aclImdb/train/unsup/'
    reviews = load_imdb_data(path, gen=False)
    torch.save(reviews, "data/reviews")

In [4]:
try:
    reviews_40 = torch.load("data/reviews_40")
except:
    reviews_40 = load_imdb_data(path, gen=True)
    torch.save(reviews_40, "data/reviews_40")

In [5]:
try:
    vocab, word2id, id2word = torch.load("data/vocab_idxs")
except:
    vocab, word2id, id2word = vocab_idxs(reviews) 
    torch.save([vocab, word2id, id2word], "data/vocab_idxs")
    

In [6]:
try:
    matrix = torch.load("data/matrix")
except:    
    matrix = sents2matrix(reviews_40, word2id)
    torch.save(matrix, "data/matrix")

### Generator Model

In [16]:
# create Tensor datasets
#train_data = TensorDataset(torch.LongTensor(matrix))

# dataloaders


# make sure the SHUFFLE your training data
#train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

In [4]:
hidden_dim = 256
vocab_size = len(vocab)
embedding_dim = 110
p = 0.5
n_layers = 1
device = torch.device("cuda")

In [7]:
from models.generator import *

decoder =  AttnMaskedDecoderRNN(hidden_dim, vocab_size, dropout_p=0.2, n_layers=n_layers, max_length=41, device=device).to(device)
encoder = MaskedEncoderRNN(hidden_dim, vocab_size, embedding_dim, device=device, p=p, n_layers=n_layers).to(device)

In [None]:
## load weight for encoder
weights = dict()
weights = torch.load("weights_1_layer")
for layer in encoder.state_dict():
    if layer in weights:
        encoder.state_dict()[layer] = weights[layer]

In [13]:
def plot_history(train_history, title='loss'):
    plt.figure()
    plt.title('{}'.format(title))
    plt.plot(train_history, label='train', zorder=1)    
    plt.xlabel('train steps')
    plt.legend(loc='best')
    plt.grid()
    plt.show()

In [18]:
def trainIters(encoder, decoder, n_epochs, learning_rate=0.01, save_to_disk=True):
    start = time.time()
    train_log = []

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    
    encoder.train()
    decoder.train()

    for epoch in range(n_epochs):
        train_loss = train_epoch(encoder, decoder, encoder_optimizer, decoder_optimizer, train_loader)
        train_log.extend(train_loss)
        
        clear_output()
        print ('Epoch [{}/{}], Loss: {:.4f}' 
                .format(epoch+1, n_epochs, np.mean(train_log[-100:])))
        plot_history(train_log)
        
    if save_to_disk:
        torch.save(model, 'generator.pt')
        
def train_epoch(encoder, decoder, encoder_optimizer, decoder_optimizer, train_loader):
    loss_log = []
    criterion = nn.NLLLoss()

    index = 0
    for sequence in train_loader:
        index += 1
        input = sequence[0].to(encoder.device)
        output = input
        loss = train(input, output, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        loss_log.append(loss.item())
        if index % 100:
            clear_output(True)
            print("mean error : ", np.mean(loss_log[-100:]))

    return loss_log

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    
    # encoder part
    input_length = input_tensor.size(0)
    encoder_hidden = encoder.init_hidden(input_length)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
 
    encoder_output, encoder_hidden, mask = encoder(input_tensor, encoder_hidden)
    #decoder part
    
    decoder_input = torch.ones(input_length, 1).to(decoder.device).long()
    
    decoder_output, decoder_hidden, _ = decoder(decoder_input, encoder_hidden, encoder_output)

    loss = criterion(
        decoder_output[mask[:, 0].byte()].squeeze(1),
        input_tensor[:, 0][mask[:, 0].byte()]
    )
    
    for char_index in range(input_tensor.shape[1] - 1):
        decoder_output = torch.argmax(decoder_output, dim=2)
        decoder_output, decoder_hidden, _ = decoder(decoder_output, decoder_hidden, encoder_output)
        loss = criterion(
            decoder_output[mask[:, char_index + 1].byte()].squeeze(1),
            input_tensor[:, char_index + 1][mask[:, char_index + 1].byte()]
        )

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss / input_length


### Learning

In [22]:
trainIters(encoder, decoder, n_epochs=10, learning_rate=0.0001)

mean error :  0.13085287623107433


KeyboardInterrupt: 