In [1]:
from data_generators import get_iterator, get_dataset
from classifiers import theme_classifier

In [2]:
import torch
import torch.nn as nn

In [3]:
from torchtext.vocab import GloVe 
GLOVE_EMBEDDING = GloVe(name="6B", dim=300)

In [4]:
train_dataset, val_dataset, test_dataset, review_text_FIELD, theme_FIELD = get_dataset(vectors = 
                                                                                       GLOVE_EMBEDDING)

In [5]:
batch_size = 32
train_iter = get_iterator(train_dataset, batch_size, train=True, shuffle=False, repeat=False)
val_iter = get_iterator(val_dataset, batch_size, train=False, shuffle=False, repeat=False)
test_iter = get_iterator(test_dataset, batch_size, train=False, shuffle=False, repeat=False)

In [6]:
list_train = list(val_iter)

In [7]:
batch = list_train[600]
x = batch.review_text.transpose(1, 0).int()[:10]
y = batch.theme.int()

for idx in range(x.shape[0]):
    #print(x.shape, y.shape)
    print("{} | {}".format(' '.join([train_dataset.fields['review_text'].vocab.itos[_] for _ in x[idx]]),
         train_dataset.fields['theme'].vocab.itos[y[idx]]))

as usual sad true story ... but great acting | plot
anything with jennifer lawrence is worth your time . | other
you 'll find them in `` dirty grandpa . | other
no direction , worthwhile plot , or acting . | plot
although be warned there are some gory scenes . | plot
one of the best musical movie for me . | other
so disappointed in jake gyllenhall for this one . | other
it takes much more than a balance to walk | other
that highlights the greatest of a true thriller . | other
overall , though , it left me cold . | other


In [8]:
#review_text_FIELD.vocab.vectors.shape, len(review_text_FIELD.vocab.itos)

In [9]:
theme_FIELD.vocab.itos

['<unk>', 'other', 'plot', 'acting', 'effect', 'production']

In [10]:
vocab_size = review_text_FIELD.vocab.vectors.shape[0]
label_size = len(theme_FIELD.vocab) - 1
emb_dim = review_text_FIELD.vocab.vectors.shape[1]
vectors = train_dataset.fields["review_text"].vocab.vectors
hidden_dim = 500
layers = 2
dropout = .2

label_size

5

In [11]:
class BaseModel(nn.Module):

    def __init__(self, 
                 ninp = vocab_size, 
                 linp = label_size, 
                 emb_dim = emb_dim, 
                 emb_lab = 20,
                 nhid = hidden_dim, 
                 nout = vocab_size, 
                 nlayers = layers, 
                 dropout = dropout, 
                 vectors = vectors,
                 pretrained = False):
        super().__init__()
        
        self.ninp = ninp
        self.linp = linp
        self.emb_dim = emb_dim
        self.emb_lab = emb_lab
        self.nhid = nhid
        self.nout = nout
        self.nlayers = nlayers
        self.drop = nn.Dropout(dropout)

        self.word_embedding = nn.Embedding(ninp, emb_dim)
        self.label_embedding = nn.Embedding(linp, emb_lab)
        
        self.rnn = nn.LSTM(emb_dim + emb_lab, nhid, nlayers, dropout=dropout)
        self.rnn.flatten_parameters()
        self.decoder = nn.Linear(nhid, nout)
        self.softmax = nn.LogSoftmax(dim=-1)

        if pretrained:
            self.encoder.weight.data = vectors
            
    def init_weights(self):
        initrange = .1
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, reviews, labels, hidden):
        R = self.word_embedding(reviews)
        L = self.label_embedding(labels)
        L = torch.cat([L.unsqueeze(0)]*R.shape[0])
        X = torch.cat([R, L], -1)
        
        X, hidden = self.rnn(X, hidden)
        X = X.view(X.size(0)*X.size(1), X.size(2))
        
        X = self.decoder(X)
        log_probs = self.softmax(X)
        return log_probs, hidden
      

In [12]:
def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

In [13]:
model = BaseModel().cuda()

In [14]:
# implement PPL
learning_rate = 0.001
criterion = nn.NLLLoss(reduction='sum', 
                       ignore_index=train_dataset.fields["review_text"].vocab.stoi['<pad>']).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss_e = 0
    total_number_of_words = 0

    with torch.no_grad():
        for i, batch in enumerate(data_source):
            labels = batch.theme.cuda().long() - 1
            batch = batch.review_text.cuda().long()
            hidden = None
            if batch.shape[0] > 1:
                data, targets = batch[:-1,:], batch[1:,:]
                number_of_words = data.shape[0]*data.shape[1]
                
                output, hidden = model(data, labels, hidden)
                output_flat = output.contiguous().view(-1, vocab_size)

                total_loss_e += criterion(output_flat, targets.contiguous().view(-1)).data.float()
                total_number_of_words += number_of_words
                hidden = repackage_hidden(hidden)
            
    return total_loss_e.item() / total_number_of_words

In [18]:
import numpy as np

In [42]:
def train(ep0, epN, train_iter, dev_iter, optimizer, criterion, 
          max_grad_norm, model_name, best_ppl = float('inf')):
    
    best_ppl = best_ppl
    
    len_train_iter = len(train_iter)
    for epoch in range(ep0, epN):
        total_loss_e = 0
        total_number_of_words = 0 
        
        for i, batch in enumerate(train_iter):

            model.zero_grad()
            labels = batch.theme.cuda().long() - 1
            batch = batch.review_text.cuda().long()
            hidden = None
            if batch.shape[0] > 1:
                model.train()


                data, targets = batch[:-1,:], batch[1:,:]
                number_of_words = data.shape[0]*data.shape[1]
                
                output, hidden = model(data, labels, hidden)
                output_flat = output.contiguous().view(-1, vocab_size)

                epoch_loss = criterion(output_flat, targets.contiguous().view(-1))
                total_loss_e += epoch_loss.data.float()
                total_number_of_words += number_of_words
                hidden = repackage_hidden(hidden)
            
                torch.nn.utils.clip_grad_norm_(model.parameters(),max_grad_norm)
                optimizer.zero_grad()
                epoch_loss.backward()
                optimizer.step()

                cur_loss = total_loss_e.item() / total_number_of_words
                tr_ppl_print = np.exp(cur_loss)
                val_loss_eval = evaluate(dev_iter)
                val_ppl_print = np.exp(val_loss_eval)

                print("| epoch {:3d} | batch {} / {} | train_loss {} | train_ppl {} | val_loss {} | val_ppl {}".format(
                        epoch, i, len_train_iter, 
                        np.round(cur_loss, 3), np.round(tr_ppl_print, 3), 
                        np.round(val_loss_eval, 3), np.round(val_ppl_print, 3)))

                if val_ppl_print < best_ppl :
                    print('old best ppl {} new best ppl {}'.format(best_ppl, val_ppl_print))
                    best_ppl = val_ppl_print
                    best_model_name = 'best_model_{}_{}.model'.format(model_name, best_ppl)
                    print('save model...', best_model_name)
                    with open(best_model_name, 'wb') as file:
                        torch.save(model, file) 
            else:
                print(i, batch.shape)

In [43]:
train(ep0 = 0,
      epN = 1,
      train_iter = train_iter,
      dev_iter = val_iter,
      optimizer = optimizer,
      criterion = criterion,
      max_grad_norm = 5,
      model_name = 'base_model_ppl',
      best_ppl = float('inf'))

0 torch.Size([0, 32])
1 torch.Size([0, 32])
2 torch.Size([0, 32])
3 torch.Size([0, 32])
4 torch.Size([0, 32])
5 torch.Size([0, 32])
6 torch.Size([0, 32])
7 torch.Size([0, 32])
8 torch.Size([0, 32])
9 torch.Size([0, 32])
10 torch.Size([0, 32])
11 torch.Size([0, 32])
12 torch.Size([0, 32])
13 torch.Size([0, 32])
14 torch.Size([0, 32])
15 torch.Size([0, 32])
16 torch.Size([0, 32])
17 torch.Size([1, 32])
18 torch.Size([1, 32])
19 torch.Size([1, 32])
20 torch.Size([1, 32])
21 torch.Size([1, 32])
22 torch.Size([1, 32])
23 torch.Size([1, 32])
24 torch.Size([1, 32])
25 torch.Size([1, 32])
26 torch.Size([1, 32])
27 torch.Size([1, 32])
28 torch.Size([1, 32])
29 torch.Size([1, 32])
30 torch.Size([1, 32])
31 torch.Size([1, 32])
32 torch.Size([1, 32])
33 torch.Size([1, 32])
34 torch.Size([1, 32])
35 torch.Size([1, 32])
36 torch.Size([1, 32])
37 torch.Size([1, 32])
38 torch.Size([1, 32])
39 torch.Size([1, 32])
40 torch.Size([1, 32])
41 torch.Size([1, 32])
42 torch.Size([1, 32])
43 torch.Size([1, 32]

  "type " + obj.__name__ + ". It won't be checked "


| epoch   0 | batch 131 / 21962 | train_loss 9.6 | train_ppl 14762.478 | val_loss 10.192 | val_ppl 26679.652
old best ppl 27197.163098554924 new best ppl 26679.652228685365
save model... best_model_base_model_ppl_26679.652228685365.model
| epoch   0 | batch 132 / 21962 | train_loss 9.756 | train_ppl 17262.839 | val_loss 10.168 | val_ppl 26060.512
old best ppl 26679.652228685365 new best ppl 26060.512268175233
save model... best_model_base_model_ppl_26060.512268175233.model
| epoch   0 | batch 133 / 21962 | train_loss 9.801 | train_ppl 18045.464 | val_loss 10.14 | val_ppl 25343.971
old best ppl 26060.512268175233 new best ppl 25343.971109642982
save model... best_model_base_model_ppl_25343.971109642982.model


KeyboardInterrupt: 

In [None]:
# Add <sos> and <eos>

In [59]:
vocab_size

30002

In [60]:
np.exp(evaluate(val_iter))

30083.068177308916