In [1]:
from data_2c_generators import get_iterator, get_dataset
from classifiers import theme_classifier, personal_classifier

In [2]:
import torch
import torch.nn as nn

In [3]:
from torchtext.vocab import GloVe 
GLOVE_EMBEDDING = GloVe(name="6B", dim=300)

In [4]:
(train_dataset, val_dataset, test_dataset, \
 review_text_FIELD, theme_FIELD, perspective_FIELD) = get_dataset(vectors = GLOVE_EMBEDDING)

In [5]:
batch_size = 20
train_iter = get_iterator(train_dataset, batch_size, train=True, shuffle=True, repeat=False)
val_iter = get_iterator(val_dataset, batch_size, train=False, shuffle=True, repeat=False)
test_iter = get_iterator(test_dataset, batch_size, train=False, shuffle=True, repeat=False)

In [6]:
val_list = list(val_iter)

In [7]:
batch = val_list[500]
x = batch.review_text.transpose(1, 0).int()[:10]
y = batch.theme.int()
z = batch.perspective.int()

for idx in range(x.shape[0]):
    #print(x.shape, y.shape)
    print("{} | {} | {}".format(' '.join([review_text_FIELD.vocab.itos[_] for _ in x[idx]]),
                                          theme_FIELD.vocab.itos[y[idx]],
                                          perspective_FIELD.vocab.itos[z[idx]]))

<sos> their scenes are so slow . <eos> | plot | False
<sos> five stars , do n't miss <eos> | other | False
<sos> great cast , poorly written directed <eos> | acting | False
<sos> great dual protagonist antagonist story . <eos> | plot | False
<sos> boring for the first two hours <eos> | other | False
<sos> trailer was better than movie . <eos> | effect | False
<sos> i liked the whole movie . <eos> | other | True
<sos> touch my heart so amazing <unk> <eos> | other | True
<sos> i say check it out . <eos> | other | True
<sos> i give it a b . <eos> | other | True


In [8]:
#review_text_FIELD.vocab.vectors.shape, len(review_text_FIELD.vocab.itos)

In [9]:
print(theme_FIELD.vocab.itos)
print(perspective_FIELD.vocab.itos)

['<unk>', 'other', 'plot', 'acting', 'effect', 'production']
['<unk>', 'False', 'True']


In [10]:
vocab_size = review_text_FIELD.vocab.vectors.shape[0]
label_0_size = len(theme_FIELD.vocab) - 1
label_1_size = len(perspective_FIELD.vocab) - 1
emb_dim = review_text_FIELD.vocab.vectors.shape[1]
vectors = train_dataset.fields["review_text"].vocab.vectors
hidden_dim = 1024
layers = 2
dropout = .5

vocab_size, label_0_size, label_1_size, emb_dim, vectors.shape

(12304, 5, 2, 300, torch.Size([12304, 300]))

In [11]:
class BaseModel(nn.Module):

    def __init__(self, 
                 ninp = vocab_size, 
                 linps = [label_0_size, label_1_size], 
                 emb_dim = emb_dim, 
                 emb_lab = 20,
                 nhid = hidden_dim, 
                 nout = vocab_size, 
                 nlayers = layers, 
                 dropout = dropout, 
                 vectors = vectors,
                 pretrained = False):
        super().__init__()
        
        self.ninp = ninp
        self.linp_offsets = [0] + linps
        self.linp = sum(linps)
        self.emb_dim = emb_dim
        self.emb_lab = emb_lab
        self.nhid = nhid
        self.nout = nout
        self.nlayers = nlayers
        self.drop = nn.Dropout(dropout)

        self.word_embedding = nn.Embedding(ninp, emb_dim)
        self.label_embedding = nn.Embedding(self.linp, emb_lab)
        
        self.rnn = nn.LSTM(emb_dim + emb_lab*len(linps), nhid, nlayers, dropout=dropout)
        self.rnn.flatten_parameters()
        self.decoder = nn.Linear(nhid, nout)

        if pretrained:
            self.word_embedding.weight.data.copy_(vectors)
            self.word_embedding.from_pretrained(GLOVE_EMBEDDING.vectors)
            
        self.init_weights()    
            
    def init_weights(self):
        initrange = .1
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, reviews, labels_list, hidden):
        X = self.word_embedding(reviews)

        for i, labels in enumerate(labels_list):
            labels = self.linp_offsets[i] + labels
            L = self.label_embedding(labels)
            L = torch.cat([L.unsqueeze(0)]*X.shape[0])
            X = torch.cat([X, L], -1)
            
        X = self.drop(X)
        X, hidden = self.rnn(X, hidden)
        X = self.drop(X)
        
        X = self.decoder(X)
        return X, hidden
      
    

In [12]:
def repackage_hidden(h):
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

In [13]:
import numpy as np
import gc

In [14]:
def evaluate(model, data_source, criterion):
    model.eval()
    total_loss_e = 0
    total_number_of_words = 0

    with torch.no_grad():
        for i, batch in enumerate(data_source):
            labels_0 = batch.theme.cuda().long() - 1
            labels_1 = batch.perspective.cuda().long() - 1
            labels = (labels_0, labels_1)
            batch = batch.review_text.cuda().long()
            hidden = None
            if batch.shape[0] > 3:
                data, targets = batch[1:-1,:], batch[2:,:]
                
                output, hidden = model(data, labels, hidden)
                output_flat = output.contiguous().view(-1, vocab_size)
                target_flat = targets.contiguous().view(-1)
                #print(data.shape, output_flat.shape, target_flat.shape)
                #0/0
                batch_loss = criterion(output_flat, target_flat).detach().item()

                number_of_words = data.shape[0] * data.shape[1]
                total_loss_e += batch_loss * number_of_words
                total_number_of_words += number_of_words

                #hidden = 
                repackage_hidden(hidden)
                #print(total_loss_e / total_number_of_words)
            
    return total_loss_e / total_number_of_words

In [15]:
def train(model, ep0, epN, train_iter, dev_iter, optimizer, criterion, 
          max_grad_norm, model_name, best_ppl = float('inf')):
    
    best_ppl = best_ppl
    
    len_train_iter = len(train_iter)
    for epoch in range(ep0, epN):
        model.train()
        total_loss_e = 0
        total_number_of_words = 0 
        
        for i, batch in enumerate(train_iter):

            labels_0 = batch.theme.cuda().long() - 1
            labels_1 = batch.perspective.cuda().long() - 1
            labels = (labels_0, labels_1)
            batch = batch.review_text.cuda().long()
            hidden = None
            if batch.shape[0] > 3:
                data, targets = batch[1:-1,:], batch[2:,:]
                
                
                output, hidden = model(data, labels, hidden)
                #hidden = 
                repackage_hidden(hidden)

                output_flat = output.contiguous().view(-1, vocab_size)
                target_flat = targets.contiguous().view(-1)
                #print(data.shape, output_flat.shape, target_flat.shape)
                #0/0
                batch_loss = criterion(output_flat, target_flat)
                
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
                optimizer.zero_grad()
                batch_loss.backward()
                optimizer.step()

                number_of_words = data.shape[0] * data.shape[1]
                total_loss_e += batch_loss.detach().item() * number_of_words
                total_number_of_words += number_of_words
            
                
                if i % 500 == 0:
                    cur_loss = batch_loss.detach().item() #np.mean(total_loss_e)#
                    tr_ppl_print = np.exp(cur_loss)
                    print("| epoch {:3d} | batch {} / {} | train_loss {} | train_ppl {}".format(
                            epoch, i, len_train_iter, 
                            np.round(cur_loss, 3), np.round(tr_ppl_print, 3)))

                #gc.collect()
                #torch.cuda.empty_cache()
                
                if i % 4999 == 1: #len_train_iter - 1:
                    cur_loss = batch_loss.detach().item() #np.mean(total_loss_e) #
                    tr_ppl_print = np.exp(cur_loss)
                    gc.collect()
                    #torch.cuda.empty_cache()
                    val_loss_eval = evaluate(model, dev_iter, criterion)
                    val_ppl_print = np.exp(val_loss_eval)
                    
                    template = "| epoch {:3d} | batch {} / {} | train_loss {} | train_ppl {} | val_loss {} | val_ppl {}"
                    print(template.format(
                            epoch, i, len_train_iter, 
                            np.round(cur_loss, 3), np.round(tr_ppl_print, 3), 
                            np.round(val_loss_eval, 3), np.round(val_ppl_print, 3)))

                    if val_ppl_print < best_ppl :
                        print('old best ppl {} new best ppl {}'.format(best_ppl, val_ppl_print))
                        best_ppl = val_ppl_print
                        best_model_name = '{}{}.model'.format(model_name, best_ppl)
                        print('save model...', best_model_name)
                        with open(best_model_name, 'wb') as file:
                            torch.save(model, file) 

                    gc.collect()
                    model.train()
                    
                if i == 40000: break


In [45]:
with open('./baseline_2c/best_model_81.86675819854015.model', 'rb') as file:
    model = torch.load(file) #BaseModel().cuda()
    model.eval()

In [46]:
#with open('best_model_name', 'rb') as file:
#    model = torch.load(file) 

In [47]:
#model = BaseModel(pretrained=True).cuda()

In [48]:
model

BaseModel(
  (drop): Dropout(p=0.5)
  (word_embedding): Embedding(12304, 300)
  (label_embedding): Embedding(7, 20)
  (rnn): LSTM(340, 1024, num_layers=2, dropout=0.5)
  (decoder): Linear(in_features=1024, out_features=12304, bias=True)
)

In [49]:
# implement PPL
#learning_rate = 0.001
criterion = nn.CrossEntropyLoss(reduction='mean',
                       ignore_index=train_dataset.fields["review_text"].vocab.stoi['<pad>']).cuda()
#optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) #, betas=(.999, .9999))

learning_rate = 0.00001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(.99999, .99999))

In [50]:
train(model,
      ep0 = 1,
      epN = 2,
      train_iter = train_iter,
      dev_iter = val_iter,
      optimizer = optimizer,
      criterion = criterion,
      max_grad_norm = 10,
      model_name = 'baseline_2c/best_model_',
      best_ppl = 81.867)

| epoch   1 | batch 500 / 35138 | train_loss 4.498 | train_ppl 89.805
| epoch   1 | batch 1000 / 35138 | train_loss 3.521 | train_ppl 33.81
| epoch   1 | batch 1500 / 35138 | train_loss 2.979 | train_ppl 19.669
| epoch   1 | batch 2000 / 35138 | train_loss 3.449 | train_ppl 31.456
| epoch   1 | batch 2500 / 35138 | train_loss 3.769 | train_ppl 43.343
| epoch   1 | batch 3000 / 35138 | train_loss 3.751 | train_ppl 42.573
| epoch   1 | batch 3500 / 35138 | train_loss 4.294 | train_ppl 73.243
| epoch   1 | batch 4000 / 35138 | train_loss 3.65 | train_ppl 38.469
| epoch   1 | batch 4500 / 35138 | train_loss 4.533 | train_ppl 93.074
| epoch   1 | batch 5000 / 35138 | train_loss 4.62 | train_ppl 101.533
| epoch   1 | batch 5000 / 35138 | train_loss 4.62 | train_ppl 101.533 | val_loss 4.868 | val_ppl 130.05
| epoch   1 | batch 5500 / 35138 | train_loss 3.846 | train_ppl 46.809
| epoch   1 | batch 6000 / 35138 | train_loss 4.178 | train_ppl 65.244
| epoch   1 | batch 6500 / 35138 | train_loss 

  "type " + obj.__name__ + ". It won't be checked "


| epoch   1 | batch 10000 / 35138 | train_loss 3.95 | train_ppl 51.942
| epoch   1 | batch 10500 / 35138 | train_loss 4.352 | train_ppl 77.624
| epoch   1 | batch 11000 / 35138 | train_loss 4.071 | train_ppl 58.62
| epoch   1 | batch 11500 / 35138 | train_loss 3.606 | train_ppl 36.831
| epoch   1 | batch 12000 / 35138 | train_loss 3.551 | train_ppl 34.862
| epoch   1 | batch 12500 / 35138 | train_loss 3.733 | train_ppl 41.811
| epoch   1 | batch 13000 / 35138 | train_loss 3.753 | train_ppl 42.633
| epoch   1 | batch 13500 / 35138 | train_loss 4.3 | train_ppl 73.663
| epoch   1 | batch 14000 / 35138 | train_loss 4.145 | train_ppl 63.098
| epoch   1 | batch 14500 / 35138 | train_loss 4.432 | train_ppl 84.061
| epoch   1 | batch 14998 / 35138 | train_loss 4.272 | train_ppl 71.664 | val_loss 4.281 | val_ppl 72.319
old best ppl 75.7635141129965 new best ppl 72.31925011074134
save model... baseline_2c/best_model_72.31925011074134.model
| epoch   1 | batch 15000 / 35138 | train_loss 4.105 | t

| epoch   0 | batch 500 / 35138 | train_loss 0.781 | train_ppl 2.184
| epoch   0 | batch 1000 / 35138 | train_loss 2.178 | train_ppl 8.825
| epoch   0 | batch 1500 / 35138 | train_loss 1.832 | train_ppl 6.243
| epoch   0 | batch 2000 / 35138 | train_loss 3.167 | train_ppl 23.728
| epoch   0 | batch 2500 / 35138 | train_loss 3.091 | train_ppl 21.994
| epoch   0 | batch 3000 / 35138 | train_loss 3.095 | train_ppl 22.083
| epoch   0 | batch 3500 / 35138 | train_loss 3.804 | train_ppl 44.894
| epoch   0 | batch 4000 / 35138 | train_loss 3.102 | train_ppl 22.244
| epoch   0 | batch 4500 / 35138 | train_loss 3.961 | train_ppl 52.502
| epoch   0 | batch 5000 / 35138 | train_loss 4.203 | train_ppl 66.883
| epoch   0 | batch 5000 / 35138 | train_loss 4.203 | train_ppl 66.883 | val_loss 14.507 | val_ppl 1997565.245
old best ppl inf new best ppl 1997565.244942298
save model... baseline_2c/best_model_1997565.244942298.model


  "type " + obj.__name__ + ". It won't be checked "


| epoch   0 | batch 5500 / 35138 | train_loss 3.643 | train_ppl 38.204
| epoch   0 | batch 6000 / 35138 | train_loss 4.075 | train_ppl 58.827
| epoch   0 | batch 6500 / 35138 | train_loss 3.734 | train_ppl 41.832
| epoch   0 | batch 7000 / 35138 | train_loss 3.737 | train_ppl 41.967
| epoch   0 | batch 7500 / 35138 | train_loss 3.668 | train_ppl 39.154
| epoch   0 | batch 8000 / 35138 | train_loss 3.4 | train_ppl 29.952
| epoch   0 | batch 8500 / 35138 | train_loss 3.912 | train_ppl 49.985
| epoch   0 | batch 9000 / 35138 | train_loss 4.404 | train_ppl 81.806
| epoch   0 | batch 9500 / 35138 | train_loss 4.243 | train_ppl 69.609
| epoch   0 | batch 9999 / 35138 | train_loss 3.69 | train_ppl 40.028 | val_loss 10.02 | val_ppl 22477.823
old best ppl 1997565.244942298 new best ppl 22477.823373549785
save model... baseline_2c/best_model_22477.823373549785.model
| epoch   0 | batch 10000 / 35138 | train_loss 4.142 | train_ppl 62.928
| epoch   0 | batch 10500 / 35138 | train_loss 4.223 | trai

KeyboardInterrupt: 

| epoch   1 | batch 500 / 35138 | train_loss 4.324 | train_ppl 75.514
| epoch   1 | batch 1000 / 35138 | train_loss 3.384 | train_ppl 29.496
| epoch   1 | batch 1500 / 35138 | train_loss 2.856 | train_ppl 17.388
| epoch   1 | batch 2000 / 35138 | train_loss 3.665 | train_ppl 39.054
| epoch   1 | batch 2500 / 35138 | train_loss 3.718 | train_ppl 41.2
| epoch   1 | batch 3000 / 35138 | train_loss 3.568 | train_ppl 35.443
| epoch   1 | batch 3500 / 35138 | train_loss 4.33 | train_ppl 75.971
| epoch   1 | batch 4000 / 35138 | train_loss 3.653 | train_ppl 38.575
| epoch   1 | batch 4500 / 35138 | train_loss 4.714 | train_ppl 111.483
| epoch   1 | batch 5000 / 35138 | train_loss 4.523 | train_ppl 92.069
| epoch   1 | batch 5000 / 35138 | train_loss 4.523 | train_ppl 92.069 | val_loss 4.999 | val_ppl 148.27
| epoch   1 | batch 5500 / 35138 | train_loss 3.746 | train_ppl 42.364
| epoch   1 | batch 6000 / 35138 | train_loss 4.154 | train_ppl 63.685
| epoch   1 | batch 6500 / 35138 | train_loss 

  "type " + obj.__name__ + ". It won't be checked "


| epoch   1 | batch 10000 / 35138 | train_loss 3.968 | train_ppl 52.89
| epoch   1 | batch 10500 / 35138 | train_loss 4.172 | train_ppl 64.869
| epoch   1 | batch 11000 / 35138 | train_loss 4.073 | train_ppl 58.758
| epoch   1 | batch 11500 / 35138 | train_loss 3.799 | train_ppl 44.654
| epoch   1 | batch 12000 / 35138 | train_loss 3.576 | train_ppl 35.74
| epoch   1 | batch 12500 / 35138 | train_loss 3.696 | train_ppl 40.302
| epoch   1 | batch 13000 / 35138 | train_loss 3.823 | train_ppl 45.748
| epoch   1 | batch 13500 / 35138 | train_loss 4.23 | train_ppl 68.741
| epoch   1 | batch 14000 / 35138 | train_loss 4.263 | train_ppl 70.993
| epoch   1 | batch 14500 / 35138 | train_loss 4.444 | train_ppl 85.109
| epoch   1 | batch 14998 / 35138 | train_loss 4.306 | train_ppl 74.143 | val_loss 4.289 | val_ppl 72.862
old best ppl 76.32543191859847 new best ppl 72.86214824252396
save model... best_model_base_model_ppl_72.86214824252396.model
| epoch   1 | batch 15000 / 35138 | train_loss 4.04

| epoch   0 | batch 500 / 35138 | train_loss 0.815 | train_ppl 2.259
| epoch   0 | batch 1000 / 35138 | train_loss 2.182 | train_ppl 8.864
| epoch   0 | batch 1500 / 35138 | train_loss 1.843 | train_ppl 6.317
| epoch   0 | batch 2000 / 35138 | train_loss 3.241 | train_ppl 25.552
| epoch   0 | batch 2500 / 35138 | train_loss 3.152 | train_ppl 23.389
| epoch   0 | batch 3000 / 35138 | train_loss 3.182 | train_ppl 24.102
| epoch   0 | batch 3500 / 35138 | train_loss 3.82 | train_ppl 45.592
| epoch   0 | batch 4000 / 35138 | train_loss 3.089 | train_ppl 21.948
| epoch   0 | batch 4500 / 35138 | train_loss 4.03 | train_ppl 56.244
| epoch   0 | batch 5000 / 35138 | train_loss 4.202 | train_ppl 66.84
| epoch   0 | batch 5000 / 35138 | train_loss 4.202 | train_ppl 66.84 | val_loss 13.078 | val_ppl 478201.324
old best ppl inf new best ppl 478201.3236771168
save model... best_model_base_model_ppl_478201.3236771168.model


  "type " + obj.__name__ + ". It won't be checked "


| epoch   0 | batch 5500 / 35138 | train_loss 3.617 | train_ppl 37.213
| epoch   0 | batch 6000 / 35138 | train_loss 4.088 | train_ppl 59.595
| epoch   0 | batch 6500 / 35138 | train_loss 3.65 | train_ppl 38.475
| epoch   0 | batch 7000 / 35138 | train_loss 3.726 | train_ppl 41.518
| epoch   0 | batch 7500 / 35138 | train_loss 3.786 | train_ppl 44.078
| epoch   0 | batch 8000 / 35138 | train_loss 3.211 | train_ppl 24.793
| epoch   0 | batch 8500 / 35138 | train_loss 3.974 | train_ppl 53.184
| epoch   0 | batch 9000 / 35138 | train_loss 4.268 | train_ppl 71.381
| epoch   0 | batch 9500 / 35138 | train_loss 4.298 | train_ppl 73.516
| epoch   0 | batch 9999 / 35138 | train_loss 3.833 | train_ppl 46.184 | val_loss 10.383 | val_ppl 32308.643
old best ppl 478201.3236771168 new best ppl 32308.64283447535
save model... best_model_base_model_ppl_32308.64283447535.model
| epoch   0 | batch 10000 / 35138 | train_loss 4.086 | train_ppl 59.524
| epoch   0 | batch 10500 / 35138 | train_loss 4.34 | t

In [70]:
evaluate(model, val_list, criterion)

4.212811502530497

In [71]:
evaluate(model, test_iter, criterion)

4.206123341756414

In [72]:
np.exp(4.206123341756414)

67.09592700457719