In [1]:
import numpy as np
import torch
import torch.nn as nn
import random
import time
from torch.nn import functional as F
from torch.autograd import Variable
from torch import optim
from rouge import Rouge
from data import *
from utils import *

## Tokens:
e.g.
```
[["Musicians to tackle US red tape Musicians ' groups are to tackle US visa regulations which are blamed for hindering",
  "Nigel McCune from the Musicians ' Union said British musicians"],
 ["U2 's desire to be number one U2 , who have won three prestigious Grammy Awards for their hit Vertigo",
  'But they still want more.They have to want to be'],
 ["Rocker Doherty in on-stage fight Rock singer Pete Doherty has been involved in a fight with his band 's guitarist",
  'Babyshambles , which he formed after his acrimonious departure from']]
```

In [2]:
train, dev = load_datasets('./Datasets/BBC_News_20_10.pkl', './Datasets/BBC_News_20_10.pkl')

## Tokens Index:
e.g.
```
[Musicians to tackle US red tape Musicians ' groups are to tackle US visa regulations which are blamed for hindering => Nigel McCune from the Musicians ' Union said British musicians
    indexed as: [2, 3, 4, 5, 6, 7, 2, 8, 9, 10, 3, 4, 5, 11, 12, 13, 10, 14, 15, 16] => [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 2],
 U2 's desire to be number one U2 , who have won three prestigious Grammy Awards for their hit Vertigo => But they still want more.They have to want to be
    indexed as: [17, 18, 19, 3, 20, 21, 22, 17, 23, 24, 25, 26, 27, 28, 29, 30, 15, 31, 32, 33] => [13, 14, 15, 16, 17, 18, 19, 16, 19, 20, 2],
 Rocker Doherty in on-stage fight Rock singer Pete Doherty has been involved in a fight with his band 's guitarist => Babyshambles , which he formed after his acrimonious departure from
    indexed as: [34, 35, 36, 37, 38, 39, 40, 41, 35, 42, 43, 44, 36, 45, 38, 46, 47, 48, 18, 49] => [21, 22, 23, 24, 25, 26, 27, 28, 29, 5, 2]]
```

In [3]:
train_data_indexed, dev_data_indexed, input_indexer, output_indexer = index_datasets(train, dev)

## Padding:
- Pad the train/dev input vectors to the max length of the train/dev input documents.
- Pad the train/dev output vectors to the max length of the train/dev output summerization.

![](https://i.imgur.com/gGlkEEF.png)

In [4]:
def make_padded_input_tensor(exs, input_indexer, max_len):
    return np.array([[ex.x_indexed[i] if i < len(ex.x_indexed) else input_indexer.index_of(PAD_SYMBOL)
                        for i in range(0, max_len)] for ex in exs])

In [5]:
def make_padded_output_tensor(exs, output_indexer, max_len):
    return np.array([[ex.y_indexed[i] if i < len(ex.y_indexed) else output_indexer.index_of(PAD_SYMBOL)
                        for i in range(0, max_len)] for ex in exs])

## Batch

In [6]:
def batch_data(input_array, batch_size=2):
    input_batches = []
    batch_num = (int)(input_array.shape[0] / batch_size)
    start = 0
    for i in range(batch_num):
        input_batches.append(torch.from_numpy(input_array[start:start+batch_size, :]))
        start += batch_size
    if start != input_array.shape[0]:
        input_batches.append(torch.from_numpy(input_array[start:, :]))
    return input_batches

## Embedding

In [7]:
class EmbeddingLayer(nn.Module):
    # Parameters: dimension of the word embeddings, number of words, and the dropout rate to apply
    # (0.2 is often a reasonable value)
    def __init__(self, input_dim, full_dict_size, embedding_dropout_rate):
        super(EmbeddingLayer, self).__init__()
        self.dropout = nn.Dropout(embedding_dropout_rate)
        self.word_embedding = nn.Embedding(full_dict_size, input_dim)

    # Takes either a non-batched input [sent len x input_dim] or a batched input
    # [batch size x sent len x input dim]
    def forward(self, input):
        embedded_words = self.word_embedding(input)
        final_embeddings = self.dropout(embedded_words)
        return final_embeddings

## Encoder

In [8]:
# One-layer RNN encoder for batched inputs -- handles multiple sentences at once. You're free to call it with a
# leading dimension of 1 (batch size 1) but it does expect this dimension.
class RNNEncoder(nn.Module):
    # Parameters: input size (should match embedding layer), hidden size for the LSTM, dropout rate for the RNN,
    # and a boolean flag for whether or not we're using a bidirectional encoder
    def __init__(self, input_size, hidden_size, dropout, bidirect):
        super(RNNEncoder, self).__init__()
        self.bidirect = bidirect
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.reduce_h_W = nn.Linear(hidden_size * 2, hidden_size, bias=True)
        self.reduce_c_W = nn.Linear(hidden_size * 2, hidden_size, bias=True)
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers=1, batch_first=True,
                               dropout=dropout, bidirectional=self.bidirect)
        self.init_weight()

    # Initializes weight matrices using Xavier initialization
    def init_weight(self):
        nn.init.xavier_uniform_(self.rnn.weight_hh_l0, gain=1)
        nn.init.xavier_uniform_(self.rnn.weight_ih_l0, gain=1)
        if self.bidirect:
            nn.init.xavier_uniform_(self.rnn.weight_hh_l0_reverse, gain=1)
            nn.init.xavier_uniform_(self.rnn.weight_ih_l0_reverse, gain=1)
        nn.init.constant_(self.rnn.bias_hh_l0, 0)
        nn.init.constant_(self.rnn.bias_ih_l0, 0)
        if self.bidirect:
            nn.init.constant_(self.rnn.bias_hh_l0_reverse, 0)
            nn.init.constant_(self.rnn.bias_ih_l0_reverse, 0)

    def get_output_size(self):
        return self.hidden_size * 2 if self.bidirect else self.hidden_size

    def sent_lens_to_mask(self, lens, max_length):
        return torch.from_numpy(np.asarray([[1 if j < lens.data[i].item() else 0 for j in range(0, max_length)] for i in range(0, lens.shape[0])]))

    # embedded_words should be a [batch size x sent len x input dim] tensor
    # input_lens is a tensor containing the length of each input sentence
    # Returns output (each word's representation), context_mask (a mask of 0s and 1s
    # reflecting where the model's output should be considered), and h_t, a *tuple* containing
    # the final states h and c from the encoder for each sentence.
    def forward(self, embedded_words, input_lens):
        # Takes the embedded sentences, "packs" them into an efficient Pytorch-internal representation
        packed_embedding = nn.utils.rnn.pack_padded_sequence(embedded_words, input_lens, batch_first=True)

        # Runs the RNN over each sequence. Returns output at each position as well as the last vectors of the RNN
        # state for each sentence (first/last vectors for bidirectional)
        output, hn = self.rnn(packed_embedding)
        # print('kdjfksdjfs: ',hn[0].shape)

        # Unpacks the Pytorch representation into normal tensors
        output, sent_lens = nn.utils.rnn.pad_packed_sequence(output)
        # print('sent_lens:', sent_lens)
        # print('input_lens:', input_lens)
        max_length = input_lens.data[0].item()
        context_mask = self.sent_lens_to_mask(sent_lens, max_length)

        # Grabs the encoded representations out of hn, which is a weird tuple thing.
        # Note: if you want multiple LSTM layers, you'll need to change this to consult the penultimate layer
        # or gather representations from all layers.
        if self.bidirect:
            h, c = hn[0], hn[1]          # [2, 20, 200]
            # print('encoder hidden:----- ', h.shape)
            # print('encoder cell:----- ', c.shape)
            # Grab the representations from forward and backward LSTMs
            h_, c_ = torch.cat((h[0], h[1]), dim=1), torch.cat((c[0], c[1]), dim=1)      # [20, 400]
            # print('kdjfksdddddddddddjfs: ',h_.shape)
            # Reduce them by multiplying by a weight matrix so that the hidden size sent to the decoder is the same
            # as the hidden size in the encoder
            new_h = self.reduce_h_W(h_)
            new_c = self.reduce_c_W(c_)
            h_t = (new_h, new_c)
        else:
            h, c = hn[0][0], hn[1][0]
            h_t = (h, c)
        return (output, context_mask, h_t)

## Attention-based Decoder

In [9]:
class AttnRNNDecoder(nn.Module):
    def __init__(self, attn_model, input_size, hidden_size, output_size, dropout, bidirect):
        super(AttnRNNDecoder, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout = dropout
        self.bidirect = bidirect
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers=1, 
                                dropout=dropout, bidirectional=bidirect)
        self.out = nn.Linear(hidden_size, output_size)
        self.concat = nn.Linear(hidden_size * 2 + hidden_size, hidden_size)
        self.linear = nn.Linear(hidden_size * 2, hidden_size)
        self.init_weight()
        
    # Initializes weight matrices using Xavier initialization
    def init_weight(self):
        nn.init.xavier_uniform_(self.rnn.weight_hh_l0, gain=1)
        nn.init.xavier_uniform_(self.rnn.weight_ih_l0, gain=1)
        if self.bidirect:
            nn.init.xavier_uniform_(self.rnn.weight_hh_l0_reverse, gain=1)
            nn.init.xavier_uniform_(self.rnn.weight_ih_l0_reverse, gain=1)
        nn.init.constant_(self.rnn.bias_hh_l0, 0)
        nn.init.constant_(self.rnn.bias_ih_l0, 0)
        if self.bidirect:
            nn.init.constant_(self.rnn.bias_hh_l0_reverse, 0)
            nn.init.constant_(self.rnn.bias_ih_l0_reverse, 0)


    def forward(self, embedded_words, dec_hidden, enc_outputs, context_mask):
        embedded_words = embedded_words.view(1, embedded_words.size(0), embedded_words.size(1))
        context_mask = context_mask.type(torch.uint8).unsqueeze(1)     # batch_size, 1, sent_lens
  
        rnn_output, hn = self.rnn(embedded_words, dec_hidden)           # 1, batch_size, hidden_size
        enc_single_dim = self.linear(enc_outputs).transpose(0, 1).transpose(1, 2)   # batch_size, hidden_size, sent_len
        attn_scores = rnn_output.transpose(0, 1).bmm(enc_single_dim)      # batch_size, 1, sent_lens
        attn_scores.data.masked_fill(context_mask == 0, float('inf'))      # batch_size, 1, sent_lens
        attn_weights = F.softmax(attn_scores.squeeze(1), dim=1).unsqueeze(1)  # batch_size, 1, sent_lens
        context = attn_weights.bmm(enc_outputs.transpose(0, 1))      # batch_size, 1, hidden_size * num_directions

        # rnn_output = rnn_output.squeeze(0)
        concat_input = torch.cat((context, rnn_output.transpose(0, 1)), dim=-1)     # batch_size, 1, enc_hidden_size * num_directions + dec_hidden_size
        concat_output = torch.tanh(self.concat(concat_input))                       # batch_size, 1, dec_hidden_size
        output = self.out(concat_output).squeeze(1)                                 # batch_size, output_size

        return (output, hn, attn_weights)

## Encoder to Decoder

In [10]:
def encode_input_for_decoder(x_tensor, inp_lens_tensor, model_input_emb, model_enc):
    input_emb = model_input_emb.forward(x_tensor)
    (enc_output_each_word, enc_context_mask, enc_final_states) = model_enc.forward(input_emb, inp_lens_tensor)
    enc_final_states_reshaped = (enc_final_states[0].unsqueeze(0), enc_final_states[1].unsqueeze(0))
    return (enc_output_each_word, enc_context_mask, enc_final_states_reshaped)

## Loss Function

In [16]:
# Implementation of loss function: masked cross entropy
# Reference to https://github.com/spro/practical-pytorch, make some modifications
def masked_cross_entropy(logits, target, length, context_mask):
    logits_flat = logits.view(-1, logits.size(-1))                  # batch * max_len, num_classes
    log_probs_flat = F.log_softmax(logits_flat, dim=-1)    # batch * max_len, num_classes
    target_flat = target.view(-1, 1)                                # batch * max_len, 1
    losses_flat = -torch.gather(log_probs_flat, dim=1, index=target_flat)    # batch * max_len, 1
    losses = losses_flat.view(*target.size())     # batch, max_len
    losses = losses * context_mask.float()
    loss = losses.sum() / length.float().sum()
    return loss

## Training Step

In [30]:
BATCH_SIZE = 64
lr = 0.0005
input_dim = 100
output_dim = 100
hidden_size = 200
emb_dropout = 0.2
rnn_dropout = 0.2
bidirectional = True
num_epochs = 5
teacher_forcing_ratio = 1

**Create indexed input/output for training**
- X_tensors_batch/Y_tensors_batch, list[array: [batch_size, sent_len], batch_num]
- inp_lens_batch/oup_lens_batch, list[array: [batch_size,], batch_num]

In [13]:
# Create indexed input/output for training
train_data_indexed.sort(key=lambda ex: len(ex.x_indexed), reverse=True)
input_train_max_len = np.max(np.asarray([len(ex.x_indexed) for ex in train_data_indexed]))
all_train_input_data = make_padded_input_tensor(train_data_indexed, input_indexer, input_train_max_len).astype(np.int64)

output_train_max_len = np.max(np.asarray([len(ex.y_indexed) for ex in train_data_indexed]))
all_train_output_data = make_padded_output_tensor(train_data_indexed, output_indexer, output_train_max_len).astype(np.int64)

X_tensors_batch = batch_data(all_train_input_data, BATCH_SIZE)   # batch_num, batch_size, sent_len
Y_tensors_batch = batch_data(all_train_output_data, BATCH_SIZE)  # batch_num, batch_size, sent_len
inp_lens_batch = [torch.tensor([torch.sum(X_tensor != 0) for X_tensor in X_tensors]) for X_tensors in X_tensors_batch]  # batch_num, batch_size
oup_lens_batch = [torch.tensor([torch.sum(Y_tensor != 0) for Y_tensor in Y_tensors]) for Y_tensors in Y_tensors_batch]  # batch_num, batch_size

**Create model**
- model_input_emb/model_output_emb: embedding layer
- model_enc/model_dec: encoder/decoder
- optimizers: encoder/decoder

In [14]:
# Create model
model_input_emb = EmbeddingLayer(input_dim, len(input_indexer), emb_dropout)
model_enc = RNNEncoder(input_dim, hidden_size, rnn_dropout, bidirectional)
model_output_emb = EmbeddingLayer(output_dim, len(output_indexer), emb_dropout)
model_dec = AttnRNNDecoder(attn_model='general', input_size=output_dim, hidden_size=hidden_size, output_size=len(output_indexer), dropout=rnn_dropout, bidirect=False)
enc_optimizer = optim.Adam(model_enc.parameters(), lr=lr)
dec_optimizer = optim.Adam(model_dec.parameters(), lr=lr)

  "num_layers={}".format(dropout, num_layers))


**Train Iteration**

In [31]:
start = time.time()
for epoch in range(0, num_epochs):
    print('--------------------- Epoch %d ---------------------'%epoch)
    for X_tensors, Y_tensors, inp_lens_tensor, oup_lens_tensor in zip(X_tensors_batch, Y_tensors_batch, inp_lens_batch, oup_lens_batch):

        model_enc.train()
        model_dec.train()

        enc_optimizer.zero_grad()
        dec_optimizer.zero_grad()

        enc_outputs, enc_context_mask, enc_hidden = encode_input_for_decoder(X_tensors, inp_lens_tensor, model_input_emb, model_enc)

        dec_hidden = enc_hidden
        dec_input = model_output_emb.forward(Variable(torch.LongTensor([output_indexer.index_of(SOS_SYMBOL)] * X_tensors.shape[0])))
        all_dec_outputs = Variable(torch.zeros(output_train_max_len, X_tensors.shape[0], model_dec.output_size))       # sent_len, batch_size, output_size
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        if use_teacher_forcing:
            for idx in range(output_train_max_len):
#                 dec_output, dec_hidden = model_dec.forward(dec_input, dec_hidden)
                dec_output, dec_hidden, dec_attn = model_dec.forward(dec_input, dec_hidden, enc_outputs, enc_context_mask)
                all_dec_outputs[idx] = dec_output
#                 print(idx)
                dec_input = model_output_emb.forward(Y_tensors[:, idx])
        else:
            for idx in range(output_train_max_len):
#                 dec_output, dec_hidden = model_dec.forward(dec_input, dec_hidden)
                dec_output, dec_hidden, dec_attn = model_dec.forward(dec_input, dec_hidden, enc_outputs, enc_context_mask)
                all_dec_outputs[idx] = dec_output
                max_prob_idx = torch.argmax(dec_output, dim=1)
                dec_input = model_output_emb.forward(max_prob_idx)

        all_context_mask = torch.from_numpy(np.asarray([[1 if j < oup_lens_tensor.data[i].item() \
            else 0 for j in range(0, Y_tensors.size(1))] for i in range(0, oup_lens_tensor.shape[0])], dtype=np.uint8))
        loss = masked_cross_entropy(all_dec_outputs.transpose(0, 1).contiguous(), Y_tensors, oup_lens_tensor, all_context_mask)       # batch_size, sent_len, output_size
                                                                                                                                      # batch_size, sent_len
        loss.backward()

        enc_optimizer.step()
        dec_optimizer.step()

        print('loss', loss.item())

--------------------- Epoch 0 ---------------------
loss 6.777652263641357
loss 6.843471050262451
loss 6.698133945465088
loss 6.744834899902344
loss 6.721421718597412
loss 6.838589668273926
loss 6.695148944854736
loss 6.942899703979492
loss 6.817431926727295
loss 6.786461353302002
loss 6.827751159667969
loss 6.839561462402344
loss 6.797379970550537
loss 6.82639741897583
loss 6.940610885620117
loss 6.849934101104736
loss 6.964906215667725
loss 6.892034530639648
loss 6.751857280731201
loss 6.733532905578613
loss 6.709282398223877
loss 6.818923473358154
loss 6.466118335723877
loss 6.362329006195068
loss 6.384599685668945
loss 6.424737453460693
loss 6.372428894042969
loss 6.48193359375
loss 6.427491664886475
loss 6.649287223815918
loss 6.505488395690918
loss 6.674210071563721
loss 6.431832790374756
loss 6.556762218475342
loss 6.461725234985352
--------------------- Epoch 1 ---------------------
loss 6.5506591796875
loss 6.631466865539551
loss 6.475909233093262
loss 6.514857769012451
loss 6

## Evaluation Step

**Create indexed input/output for development**

In [22]:
# Create indexed input/output for dev
dev_data_indexed.sort(key=lambda ex: len(ex.x_indexed), reverse=True)
input_dev_max_len = np.max(np.asarray([len(ex.x_indexed) for ex in dev_data_indexed]))
all_dev_input_data = make_padded_input_tensor(dev_data_indexed, input_indexer, input_dev_max_len).astype(np.int64)
output_dev_max_len = np.max(np.asarray([len(ex.y_indexed) for ex in dev_data_indexed]))
X_tensors_batch_dev = batch_data(all_dev_input_data, BATCH_SIZE)   # batch_num, batch_size, sent_len
inp_lens_batch_dev = [torch.tensor([torch.sum(X_tensor != 0) for X_tensor in X_tensors]) for X_tensors in X_tensors_batch_dev]  # batch_num, batch_size

In [32]:
best_data = []
model_enc.eval()
model_dec.eval()
for X_tensors, inp_lens_tensor in zip(X_tensors_batch_dev, inp_lens_batch_dev):
    enc_outputs, enc_context_mask, enc_hidden = encode_input_for_decoder(X_tensors, inp_lens_tensor, model_input_emb, model_enc)
    dec_hidden = enc_hidden
    dec_input = model_output_emb.forward(Variable(torch.LongTensor([output_indexer.index_of(SOS_SYMBOL)] * X_tensors.shape[0])))
    all_dec_outputs = Variable(torch.zeros(output_dev_max_len, X_tensors.shape[0], model_dec.output_size))       # sent_len, batch_size, output_size
    for idx in range(output_dev_max_len):
        dec_output, dec_hidden, dec_attn = model_dec.forward(dec_input, dec_hidden, enc_outputs, enc_context_mask)
        all_dec_outputs[idx] = dec_output
        max_prob_idx = torch.argmax(dec_output, dim=1)
        dec_input = model_output_emb.forward(max_prob_idx)
    for best_sent in torch.argmax(all_dec_outputs, dim=2).transpose(0, 1).contiguous():
        best_ex = []
        for word_idx in best_sent:            # don't need to include EOS tok
            if word_idx.item() == output_indexer.index_of(EOS_SYMBOL):
                break
            best_ex.append(output_indexer.get_object(word_idx.item()))     # pred tok
        best_data.append(best_ex)
            
rouge = Rouge()
for test_ex, best_ex in zip(dev_data_indexed, best_data):
    test_str = ' '.join(test_ex.y_tok)
    best_str = ' '.join(best_ex)
    print(best_str)
    scores = rouge.get_scores(best_str, test_str)
    print(scores)

The said the the the the the the the the
[{'rouge-1': {'f': 0.3076923041420118, 'p': 0.6666666666666666, 'r': 0.2}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.10613437195724833, 'p': 0.3333333333333333, 'r': 0.1}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.0, 'p': 0.0, 'r': 0.0}}]
The `` `` said the the the the the the
[{'rouge-1': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.0, 'p': 0.0, 'r': 0.0}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.0, 'p': 0.0, 'r': 0.0}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.15384615029585808, 'p': 0.3333333333333333, 'r': 0.1}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.10613437195724833, 'p': 0.3333333333333333, 'r': 0.1}}]
The `` `` the 

[{'rouge-1': {'f': 0.15384615029585808, 'p': 0.3333333333333333, 'r': 0.1}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.10613437195724833, 'p': 0.3333333333333333, 'r': 0.1}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.15384615029585808, 'p': 0.3333333333333333, 'r': 0.1}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.10613437195724833, 'p': 0.3333333333333333, 'r': 0.1}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.0, 'p': 0.0, 'r': 0.0}}]
The `` `` the the the the the the the
[{'rouge-1': {'f': 0.15384615029585808, 'p': 0.3333333333333333, 'r': 0.1}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.10613437195724833, 'p': 0.3333333333333333, 'r': 0.1}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.15384615029585808, 'p': 0.3333333333333333, 'r': 0.1}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge

The said the the the the the the the the
[{'rouge-1': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.0, 'p': 0.0, 'r': 0.0}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.0, 'p': 0.0, 'r': 0.0}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.15384615029585808, 'p': 0.3333333333333333, 'r': 0.1}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.10613437195724833, 'p': 0.3333333333333333, 'r': 0.1}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.0, 'p': 0.0, 'r': 0.0}}]
The said the the the the the the the the
[{'rouge-1': {'f': 0.15384615029585808, 'p': 0.3333333333333333, 'r': 0.1}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.10613437195724833, 'p': 0.3333333333333333, 'r': 0.1}}]
The `` `` ,

In [None]:
# Create indexed input/output for training
# train_data_indexed.sort(key=lambda ex: len(ex.x_indexed), reverse=True)
# input_train_max_len = np.max(np.asarray([len(ex.x_indexed) for ex in train_data_indexed]))
# all_train_input_data = make_padded_input_tensor(train_data, input_indexer, input_train_max_len, args.reverse_input)
# output_train_max_len = np.max(np.asarray([len(ex.y_indexed) for ex in train_data]))
# all_train_output_data = make_padded_output_tensor(train_data, output_indexer, output_train_max_len)


# # Create indexed input/output for dev
# dev_data_indexed.sort(key=lambda ex: len(ex.x_indexed), reverse=True)
# input_dev_max_len = np.max(np.asarray([len(ex.x_indexed) for ex in dev_data_indexed]))
# all_dev_input_data = make_padded_input_tensor(dev_data_indexed, input_indexer, input_dev_max_len, args.reverse_input)
# output_dev_max_len = np.max(np.asarray([len(ex.y_indexed) for ex in dev_data_indexed]))
# all_dev_output_data = make_padded_output_tensor(dev_data_indexed, output_indexer, output_dev_max_len)