In [1]:
from utils import get_data, Dataset, index2Sent, collate_fn_train, collate_fn_test

## Load Data

In [2]:
DATA_PATH = "data/news_summary.csv"

In [3]:
NUM_WORDS = 10000
MAX_TEXT_LEN = 500
MAX_SUM_LEN = 100

In [4]:
data, w2i, i2w = get_data(DATA_PATH, NUM_WORDS)

Length of the data: 4514
Length of the data after dropping nan: 4396


In [5]:
dataset = Dataset(data, w2i, MAX_TEXT_LEN, MAX_SUM_LEN, isTrain=True)

# Train

In [6]:
from Seq2Seq import Seq2Seq
import torch
import torch.nn as nn

In [7]:
VOCAB_SIZE = NUM_WORDS + 4
EMBEDDING_DIM = 50
HIDDEN_DIM = 128
BATCH_SIZE = 12
DEVICE = 'cuda'

In [8]:
dataloader = torch.utils.data.DataLoader(dataset, BATCH_SIZE, num_workers=8, shuffle=True, collate_fn=collate_fn_train)

In [9]:
model = Seq2Seq(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM).to(DEVICE)
criterion = nn.CrossEntropyLoss(reduction='none')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [11]:
for i in range(10):
    total_loss = 0.
    for batch_num, ((x, xlens), (y, ylens)) in enumerate(dataloader):

        # setup tensors
        x = x.long().to(DEVICE)
        y = y.long().to(DEVICE)
        
#         print(x.size(), y.size())
        
        # clear previous gradients
        optimizer.zero_grad()

        # generate predictions
        # output: (BATCH_SIZE, time_steps, NUM_WORDS)
        output = model(x, xlens, y)

        ### Calculate Loss
        # 1. y must be shifted by 1 for loss calc. since outputs should not not contain <sos>
        y_true = torch.cat([y[:, 1:], torch.ones((y.size(0), 1)).long().to(DEVICE) * w2i["<pad>"]], dim=-1)

        # 2. Ouput shape for loss calculation must be of the form (BATCH_SIZE, NUM_WORDS, *)
        # Refer pytorch docs for more details
        loss = criterion(output.permute(0, 2, 1), y_true)

        # 3. Mask the loss. Needed since we have padding which is not needed
        # Can avoid if using pack_padded sequence?
        num_tokens = 0
        for i, yl in enumerate(ylens):
            loss[i, yl-1:] *= 0 # yl-1 to remove <sos>
            num_tokens += yl - 1

        # 4. SUM the losses then divide by number of tokens and finally call backward
        loss = loss.sum() / num_tokens
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 2)
        # Adjust parameters
        optimizer.step()
        if (batch_num + 1) % 10 == 0:
             print("Step: {} Loss: {}".format(batch_num + 1, loss.item()))
        total_loss += loss.item()
    print("EP: {} Loss: {}".format(i + 1, total_loss / len(dataloader)))

torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 459, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
Step: 10 Loss: 9.107857704162598
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 453, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 394, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
Step: 20 Loss: 9.115081787109375
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 359, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
Step: 30 Loss: 9.073039054870605
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 5

torch.Size([12, 497, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
Step: 290 Loss: 9.105213165283203
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 485, 50])
torch.Size([12, 500, 50])
torch.Size([12, 481, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
Step: 300 Loss: 9.080850601196289
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 497, 50])
torch.Size([12, 500, 50])
torch.Size([12, 495, 50])
torch.Size([12, 500, 50])
Step: 310 Loss: 9.105217933654785
torch.Size([12, 448, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 370, 50])
torch.Size([12, 500, 50])
torch.Size([12

torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
Step: 200 Loss: 9.096546173095703
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
Step: 210 Loss: 9.1103515625
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 408, 50])
torch.Size([12, 500, 50])
Step: 220 Loss: 9.095417022705078
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500, 50])
torch.Size([12, 500

KeyboardInterrupt: 

In [24]:
xlens[0]

188

In [12]:
i = 0

In [26]:
print(x[i][:xlens[i]])

tensor([10000,   635,   132,  1430, 10002,  4845,     6,  9918,    12,  1237,
         3116,   167,     5,  5182,  2051,    43, 10002,   212, 10002,    11,
          566,    57,     0,    77,     1,   262,    43,   346,    51,  6644,
          105,  1143,  1841,   100, 10002,    15,   886,    12,    69, 10002,
            4,     0,  3522,   860,    43, 10002,     0,   141,   567,     8,
           42,    11,  3640,  5293, 10002,   103,  9803,   742,    11,    38,
         1885, 10002,     4, 10002,    33,    22,  1352,     5,  9214,    33,
           22,    28,  4883,    58,     2,   494,   126,   886,  1993,    10,
         2040,     1,     0, 10002,    33, 10002,     3,   174,    54,  7994,
          151,     0, 10002,   141,   314,    24,    22,     5,  5420,  8227,
            4,    39,  7468,    18,     0,  8323,    42,   753,  1881,     9,
          783,  1257,     4,    72,  1800,    80,     5,   197,     8,    11,
            5, 10002, 10002,   439,    22,     5, 10002, 10002, 

In [13]:
print(y[i][:ylens[i]])

tensor([10000,   473,  1143,  1841,   567,     8,    42,    11,    28,  4883,
           58,     2,   494,   126,   886,  1993,    10,  2040,     1,     0,
        10002,   737,     6, 10002,  1143,    16,     8,    42,    11,  1352,
           20,     5,   351,     3,   287,     8, 10002,     4,   596,   439,
           22,     5, 10002, 10002,    42,   753, 10002,   159,    29,    25,
         1450,     1,    19,  1438,  2356,   312,  1095,   547,    47, 10002,
        10001], device='cuda:0')


In [27]:
index2Sent(x[i, :xlens[i]].cpu().numpy(), i2w)

'<sos> ever since kangana <unk> remark on koffee with karan sparked off a nationwide debate about <unk> every <unk> has something or the other to say about it. when mid-day asked priyanka chopra what <unk> as someone with no <unk> in the industry, felt about <unk> the actor revealed that she has personally experienced <unk> said, "every star has their individual <unk> in <unk> i have faced a lot. i have been kicked out of films because someone else was recommended to the <unk> i <unk> and got over it." however, the <unk> actor does not have a bitter taste in her mouth from the experience. she added, "there is nothing wrong in being born into a family that has a <unk> <unk> don\'t have a <unk> <unk> star kids have <unk> pressure of living up to the family <unk> <unk> those who are meant to be success stories become that, despite all <unk> the work front, priyanka is gearing up for her big hollywood debut, <unk> she will be seen playing the <unk> victoria <unk> in the film, that also sta

In [19]:
index2Sent(y[i, :ylens[i]].cpu().numpy(), i2w)

"<sos> actress priyanka chopra revealed that she has been kicked out of films because someone else was recommended to the <unk> speaking on <unk> priyanka said that she has faced it a lot and added that <unk> in bollywood don't have a <unk> <unk> she added, <unk> those who are meant to be success stories become that, despite all <unk> <eos>"

In [16]:
index2Sent(output.argmax(dim=-1)[i].cpu().numpy(), i2w)

'<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>'

In [17]:
output.argmax(dim=-1)[-1]

tensor([10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002,
        10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002,
        10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002,
        10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002,
        10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002,
        10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002, 10002,
        10002, 10002, 10002], device='cuda:0')

In [None]:
i2w[31]